In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from pathlib import Path
import yfinance as yf
from pandas_datareader import data as pdr
import FinanceDataReader as fdr
import pandas_ta  as ta
from pandas.tseries.offsets import DateOffset
import hvplot.pandas
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [3]:
SMAPath = Path("./Data/sp500_sma.csv")
EMAPath = Path("./Data/sp500_ema.csv")
tickersPath = Path("./Data/sp500.csv")

# Read SMA data
#SMA_df = pd.read_csv(SMAPath, header=[0, 1], index_col = 0, parse_dates=True, infer_datetime_format =True)
#SMA_df.index = pd.to_datetime(SMA_df.index, utc=True).strftime('%Y-%m-%d')
# Read EMA data
#EMA_df = pd.read_csv(EMAPath, header=[0, 1], index_col = 0,parse_dates=True, infer_datetime_format =True)
#EMA_df.index = pd.to_datetime(EMA_df.index, utc=True).strftime('%Y-%m-%d')
# Read stocks tickers
tickers_df = pd.read_csv(tickersPath)
tickers = tickers_df['Symbol']
start_date = '2014-01-01'
end_date = '2024-03-01'


In [4]:
def preStrategydata(stock, short, long):
    pricePath = Path("./Data/sp500_closeprice.csv")
    #Read stocks prices data
    price_df = pd.read_csv(pricePath, index_col = 'Date', parse_dates=True,
    infer_datetime_format =True)
    price_df.index = pd.to_datetime(price_df.index, utc=True).strftime('%Y-%m-%d')
    # Read stocks tickers
    tickers = price_df.columns

    # Read economic feature data
    feature_df = pd.read_csv("./Data/eco_features.csv", index_col = 'Date', parse_dates=True,
    infer_datetime_format =True)
    feature_df.index = pd.to_datetime(feature_df.index, utc=True).strftime('%Y-%m-%d')
 
    print(f'Trading Test for {stock}')
#    if stock == 'S&P 500':
#        signals_df = feature_df[['S&P 500']]
#    elif stock in tickers:
#        signals_df= price_df[[stock]]
#    else:
        #start_date = input('Enter Start Date for Historical Data in yyyy-mm-dd:')
    start_date = '2014-01-01'
    end_date = '2024-03-01'
    signals_df= fdr.DataReader(stock, start = start_date, end = end_date)[['Close','Volume']]
    signals_df.columns = [stock,'Volume']
    signals_df['close'] = signals_df[stock]
    signals_df['SMA_Fast'] = signals_df[['close']].ta.sma(length=4)
    signals_df['SMA_Slow'] = signals_df[['close']].ta.sma(length=100)
    #signals_df['vol_21'] = signals_df['close'].rolling(window=21).std()
    #signals_df['rsi_14'] = signals_df[['close']].ta.rsi()
    #signals_df[['ema_12', 'ema_26', 'MACD_signal']] = signals_df.ta.macd()
    #signals_df['OBV_in_million'] = signals_df.ta.obv()/1000000
    signals_df["DailyReturns"] = signals_df['close'].pct_change()
    signals_df.index = pd.to_datetime(signals_df.index, utc=True).strftime('%Y-%m-%d')
    signals_df = signals_df.join(feature_df.drop(columns = ['S&P 500']), how='right')
    
    # Drop all NaN values from the DataFrame
    signals_df = signals_df.dropna()
    signals_df.drop(columns= ['close'], inplace = True)
    return signals_df


In [5]:
def train_test_scaled_split(X,y, training_begin, training_months):
    # Select the start of the training period
    if training_begin < X.index.min():
        training_begin = X.index.min()        
    # Display the training begin date
    print(f'training begins at {training_begin}')
    # Select the ending period for the training data with an offset of training months
    training_end = (pd.to_datetime(training_begin) + pd.DateOffset(months=training_months)).strftime('%Y-%m-%d')
    # Display the training end date
    print(f'training ends at {training_end}')

    # Generate the X_train and y_train DataFrames
    X_train = X.loc[training_begin:training_end]
    y_train = y.loc[training_begin:training_end]

    # Generate the X_test and y_test DataFrames
    test_start = (pd.to_datetime(training_end)+pd.DateOffset(days=1)).strftime('%Y-%m-%d')
    print(f'test starts at {test_start}')
    X_test = X.loc[test_start:]
    #X_test = X_test.iloc[:-1]
    y_test = y.loc[test_start:]
    
    # Create a StandardScaler instance
    scaler = StandardScaler()
    # Apply the scaler model to fit the X-train data
    X_scaler = scaler.fit(X_train)

    # Transform the X_train and X_test DataFrames using the X_scaler
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    #for test
    #if (print(len(X_test_scaled)-len(y_test))!=0):
        #print(f'error:X_test length({len(X_test_scaled)}) is not the same with y_test length({len(y_test)})')
    return (X_train_scaled, X_test_scaled, y_train, y_test) 

In [6]:
# Data Normalizing & Train-Test Split

from sklearn.preprocessing import StandardScaler

def train_test_split_scaled(predictors_df, target_df, train_range_months, test_range_months):
    # Training Data of 27months and Testing Data of 6months alternatively 
    # with latest 33months of testing data
    #train_range_months = 27
    #test_range_months = 6
    split_range = list(range(len(predictors_df)//21//(train_range_months+test_range_months)))
    predictors_train = pd.DataFrame()
    split_index = pd.DataFrame(columns = split_range)
    
    for i in split_range:
        train_df = X.iloc[21*(train_range_months+test_range_months)*i:(21*(train_range_months+test_range_months)*i)+(21*(train_range_months)),:]
        predictors_train = pd.concat([predictors_train,train_df],axis=0)
        split_index[i] = train_df.index
        
    predictors_test = predictors_df.drop(index = predictors_train.index)
    target_train = target_df.drop(index = predictors_test.index)
    target_test = target_df.drop(index = predictors_train.index)

    scaler = StandardScaler()
    #Using StandardScaler to fit predictors train data and transform both train and test data 
    predictors_scaled = scaler.fit(predictors_train)
    predictors_train_scaled = predictors_scaled.transform(predictors_train)
    predictors_test_scaled = predictors_scaled.transform(predictors_test)
    
    return(predictors_train_scaled,predictors_test_scaled,target_train,target_test,split_index)

In [7]:
# Import a new classifier from SKLearn
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

def classifier_prediction(X_train_model, X_test_model, y_train_model, y_test_model):
    # Initiate the model instance
    # From SVM, instantiate SVC classifier model instance
    svm_model = svm.SVC(kernel='sigmoid', degree=30, C=10000)
    #ada_model = AdaBoostClassifier()
    dt_model = DecisionTreeClassifier(class_weight = 'balanced', max_depth =30) #criterion='gini', class_weight = 'balanced', 
    #lg_model = LogisticRegression(class_weight='balanced')
    
    # Fit the model to the data using the training data
    svm_model = svm_model.fit(X_train_model, y_train_model)
    #model_ada = ada_model.fit(X_train_model, y_train_model)
    model_dt = dt_model.fit(X_train_model, y_train_model)
    #model_lg = lg_model.fit(X_train_model, y_train_model)
 
    # Use the testing data to make the model predictions
    svm_pred = svm_model.predict(X_test_model)
    #pred_ada = model_ada.predict(X_test_model)
    pred_dt = model_dt.predict(X_test_model)
    #pred_lg = model_lg.predict(X_test_model)
    
    # Review the model's predicted values for test
    #print(svm_pred)
    #print(pred_ada)
    #print(pred_dt)
    #print(pred_lg)

    # Use a classification report to evaluate the model using the predictions and testing data
    svm_testing_report = classification_report(y_test_model, svm_pred)
    #ada_testing_report = classification_report(y_test_model, pred_ada)
    dt_testing_report = classification_report(y_test_model, pred_dt)
    #lg_testing_report = classification_report(y_test_model, pred_lg)
    # Print the classification report
    print('Classification Report by SVC Prediction Model')
    print(svm_testing_report)
    #print('Classification Report by AdaBoost Prediction Model')
    #print(ada_testing_report)
    print('Classification Report by DecisionTree Model')
    print(dt_testing_report)
    #print('Classification Report by Logistic Regression Model')
    #print(lg_testing_report)

    # Create a predictions DataFrame
    test_start_model = y_test_model.index.min()
    predictions_df = pd.DataFrame(index = y_test_model.loc[test_start_model:].index)

    # Add the models predictions to the DataFrame
    predictions_df['Actual_signal'] = y_test_model
    predictions_df['Predicted_svc'] = svm_pred
    #predictions_df['Predicted_Ada'] = pred_ada
    predictions_df['Predicted_DecisionTree'] = pred_dt
    #predictions_df['Predicted_LogisticRegression'] = pred_lg
    return predictions_df

In [8]:
def returns(s_df, pred_df, startdate):
    df = s_df[['DailyReturns']].loc[pred_df.index]
    # Add the strategy returns to the DataFrame
    #df['Strategy Returns']= df['DailyReturns'] * pred_df['Actual_signal']
    df['Strategy Returns_svc'] = df['DailyReturns'] * pred_df['Predicted_svc']
    #df['Strategy Returns_Ada'] = df['DailyReturns'] * pred_df['Predicted_Ada']
    df['Strategy Returns_DecisionTree'] = df['DailyReturns'] * pred_df['Predicted_DecisionTree']
    #df['Strategy Returns_LogisticRegression'] = = df['DailyReturns'] * pred_df['Predicted_LogisticRegression']
    #cumreturn_df = (1+df).cumprod()
    return df


In [383]:
stock = 'AAPL'
# Set the short window and long window
short_window = 4
long_window = 100
s_df = preStrategydata(stock, short_window, long_window)
#s_df.tail()

# Initialize the new Signal column
s_df['Signal'] = 0.0
s_df['Signal_sma'] = 0.0

#Simple Strategy
# When Actual Returns are greater than or equal to 0, generate signal to buy stock long
s_df.loc[(s_df['DailyReturns'] >= 0), 'Signal'] = 1
# When Actual Returns are less than 0, generate signal to sell stock short
s_df.loc[(s_df['DailyReturns'] < 0), 'Signal'] = -1

#SMA Strategy
# Generate the trading signal 1 or -1,
# where 1 is when the SMA_fast is less than the SMA_slow
# where 0 is when the SMA50 is greater than than the SMA100
s_df['Signal_sma'][short_window:]=np.where(
    s_df['SMA_Fast'][short_window:] < s_df['SMA_Slow'][short_window:], 1, -1)

# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = s_df[['SMA_Fast', 'SMA_Slow','Gold','Copper','US_dollar','Crude_oil','VIX','Wheat','M2','10_year_treasury','3_months_treasury','Shiller_index','Fed_fund_effective_rate','Unemployment','Disposable_income','GDP','CPI','Industrial_production','Commercial_loans','Private_investment']].shift().dropna()
#X = s_df.drop(columns = [stock, 'Signal', 'Signal_sma'])
#X = X.shift().dropna()
# Create the target set selecting the Signal column and assiging it to y
y = s_df['Signal'].iloc[1:]
y_sma = s_df['Signal_sma'].iloc[1:]

#Training test simple split
train_begin = '2014-01-01'
train_months = 39
X_train_scaled, X_test_scaled, y_train, y_test=train_test_scaled_split(X,y, train_begin,train_months)
X_sma_train_scaled, X_sma_test_scaled, y_sma_train, y_sma_test=train_test_scaled_split(X,y_sma,train_begin,train_months)

#sectional training test splits
train_range_months = 27
test_range_months = 6
X1_train_scaled, X1_test_scaled, y1_train, y1_test, split_index = train_test_split_scaled(X,y, train_range_months, test_range_months)
X1_sma_train_scaled, X1_sma_test_scaled, y1_sma_train, y1_sma_test, split1_index = train_test_split_scaled(X,y_sma, train_range_months, test_range_months)

# Generate the predictions from different classifiers for different strategies from simple training test split
pred_df= classifier_prediction(X_train_scaled, X_test_scaled, y_train, y_test)
pred_sma_df= classifier_prediction(X_sma_train_scaled, X_sma_test_scaled, y_sma_train, y_sma_test)
# Get the actual returns from predictions
test_start = y_test.index.min()
returns_df = returns(s_df,pred_df, test_start)
returns_df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)
test_start = y_sma_test.index.min()
returns_sma_df = returns(s_df,pred_sma_df, test_start)
returns_sma_df['SMA'] = returns_sma_df['DailyReturns']*s_df['Signal_sma'].loc[test_start:]
returns_sma_df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)

# Generate the predictions from different classifiers for different strategies from sectional training test split
pred1_df= classifier_prediction(X1_train_scaled, X1_test_scaled, y1_train, y1_test)
pred1_sma_df= classifier_prediction(X1_sma_train_scaled, X1_sma_test_scaled, y1_sma_train, y1_sma_test)
# Get the actual returns from predictions
test1_start = y1_test.index.min()
returns1_df = returns(s_df,pred1_df, test1_start)
returns1_df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)
test1_start = y1_sma_test.index.min()
returns1_sma_df = returns(s_df,pred1_sma_df, test1_start)
returns1_sma_df['SMA'] = returns1_sma_df['DailyReturns']*s_df['Signal_sma'].loc[test_start:]
returns1_sma_df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)

Trading Test for AAPL
training begins at 2014-05-28
training ends at 2017-08-28
test starts at 2017-08-29
training begins at 2014-05-28
training ends at 2017-08-28
test starts at 2017-08-29
Classification Report by SVC Prediction Model
              precision    recall  f1-score   support

        -1.0       0.55      0.11      0.18       771
         1.0       0.54      0.92      0.68       865

    accuracy                           0.54      1636
   macro avg       0.54      0.51      0.43      1636
weighted avg       0.54      0.54      0.44      1636

Classification Report by DecisionTree Model
              precision    recall  f1-score   support

        -1.0       0.46      0.15      0.23       771
         1.0       0.53      0.84      0.65       865

    accuracy                           0.52      1636
   macro avg       0.49      0.50      0.44      1636
weighted avg       0.50      0.52      0.45      1636

Classification Report by SVC Prediction Model
              precis

In [385]:
cum_return = (1 + returns_df.iloc[:15]).cumprod()
cum_sma_return = (1 + returns_sma_df.iloc[:15]).cumprod()
print('Simple Strategy with simple training split:')
print(cum_return.iloc[-1])
print('SMA Strategy with simple training split:')
print(cum_sma_return.iloc[-1])

Simple Strategy with simple training split:
Buy_Hold                         0.983031
Strategy Returns_svc             0.983031
Strategy Returns_DecisionTree    1.053379
Name: 2017-09-19, dtype: float64
SMA Strategy with simple training split:
Buy_Hold                         0.983031
Strategy Returns_svc             0.983031
Strategy Returns_DecisionTree    1.016080
SMA                              1.016080
Name: 2017-09-19, dtype: float64


In [386]:
cum_return1 = (1 + returns1_sma_df.iloc[:15]).cumprod()
cum_sma_return1 = (1 + returns1_sma_df.iloc[:15]).cumprod()
print('Simple Strategy with sectional training split:')
print(cum_return1.iloc[-1])
print('SMA Strategy with sectional training split:')
print(cum_sma_return1.iloc[-1])

Simple Strategy with sectional training split::
Buy_Hold                         1.069795
Strategy Returns_svc             0.930140
Strategy Returns_DecisionTree    1.117631
SMA                                   NaN
Name: 2016-09-15, dtype: float64
SMA Strategy with sectional training split:
Buy_Hold                         1.069795
Strategy Returns_svc             0.930140
Strategy Returns_DecisionTree    1.117631
SMA                                   NaN
Name: 2016-09-15, dtype: float64


In [387]:
# Plot the actual returns versus the different strategies returns
plt10 = (1 + returns_df.iloc[:15]).cumprod().hvplot(title = 'Simple Strategy 15-day Prediction Returns with Simple Split',
                                                   legend = 'top', 
                                                   rot= 90)
plt11 = (1 + returns1_df.iloc[:15]).cumprod().hvplot(legend = 'top',
                                                     rot=90,
                                                     title = 'Simple Strategy 15-day Prediction Returns with Sectional Split')
plt1=plt10+plt11
plt1.opts(opts.Layout(shared_axes=False)).cols(1)
#plt1.opts(opts.Overlay()
#hvplot.save(plt1, './Picture/Simple Strategy 15-day Prediction Returns.png')
plt1

In [394]:
plt2 = (1 + returns_df).cumprod().hvplot(legend = 'top',
                                         title = 'Simple Strategy Prediction Returns with Simple Split')
#hvplot.save(plt2, './Picture/Simple Strategy Prediction Returns with Simple Split.png')
plt2

In [379]:
plt121 = (1 + returns1_df.loc[:split1_index[1][0]]).cumprod().hvplot(legend = 'top', rot = 90)
plt122 =(1 + returns1_df.loc[split1_index[1][len(split1_index[1])-1]:split1_index[2][0]]).cumprod().hvplot(legend = 'top', rot = 90) 
plt123 =(1 + returns1_df.loc[split1_index[2][len(split1_index[2])-1]:]).cumprod().hvplot(legend = 'top') 
plt12=plt121+plt122+plt123
plt12.opts(opts.Layout(shared_axes=False)).cols(1)
#hvplot.save(plt12, './Picture/Simple Strategy Prediction Returns with Sectional Split.png')
plt12

In [389]:
# Plot the actual SMA strategy returns versus the different classifier model's prediction results
plt03 = (1 + returns_sma_df.iloc[:15]).cumprod().hvplot(rot = 90,
                                                       legend = 'top',
                                                       title ='SMA Strategy 15-day Prediction Returns with Simple Split')
plt13 = (1 + returns1_sma_df.iloc[:15]).cumprod().hvplot(rot = 90,
                                                       legend = 'top',
                                                       title ='SMA Strategy 15-day Prediction Returns with Sctional Split')

plt3 = plt03+plt13
plt3.opts(opts.Layout(shared_axes=False)).cols(1)
#hvplot.save(plt3, './Picture/SMA Strategy 15-day Prediction Returns.png')
plt3

In [393]:
plt4 = (1 + returns_sma_df).cumprod().hvplot(legend = 'top',
                                         title = 'SMA Strategy Prediction Returns with Simple Split')
#hvplot.save(plt4, './Picture/SMA Strategy Prediction Returns with Simple Split.png')
plt4

In [392]:
from holoviews import opts
plt141 = (1 + returns1_sma_df.loc[:split1_index[1][0]]).cumprod().hvplot(legend = 'top', rot =90)
plt142 =(1 + returns1_sma_df.loc[split1_index[1][len(split1_index[1])-1]:split1_index[2][0]]).cumprod().hvplot(legend = 'top', rot = 90) 
plt143 =(1 + returns1_sma_df.loc[split1_index[2][len(split1_index[2])-1]:]).cumprod().hvplot(legend = 'top') 
plt14=plt141+plt142+plt143
plt14.opts(opts.Layout(shared_axes=False), 
          opts.Overlay(title='SMA Strategy Prediction Returns with Sectional Split')).cols(1)
hvplot.save(plt14, './Picture/SMA Strategy Prediction Returns with Sectional Split.png')
plt14

In [13]:
def classifier_selector(stock, short, long, train_begin, train_month):
    sig_df = preStrategydata(stock, short, long)
    sig_df['Signal'] = 0.0
    sig_df['Signal_sma'] = 0.0
    #print(sig_df)

    #Simple Strategy
    # When Actual Returns are greater than or equal to 0, generate signal to buy stock long
    sig_df.loc[(sig_df['DailyReturns'] >= 0), 'Signal'] = 1
    # When Actual Returns are less than 0, generate signal to sell stock short
    sig_df.loc[(sig_df['DailyReturns'] < 0), 'Signal'] = -1

    #SMA Strategy
    # Generate the trading signal 1 or -1,
    # where 1 is when the SMA_fast is less than the SMA_slow
    # where -1 is when the SMA50 is greater than than the SMA100
    sig_df['Signal_sma'][short:]=np.where(
    sig_df['SMA_Fast'][short:] < sig_df['SMA_Slow'][short:], 1, -1)

    # Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
    X = sig_df[['SMA_Fast', 'SMA_Slow','Gold','Copper','US_dollar','Crude_oil','VIX','Wheat','M2','10_year_treasury','3_months_treasury','Shiller_index','Fed_fund_effective_rate','Unemployment','Disposable_income','GDP','CPI','Industrial_production','Commercial_loans','Private_investment']].shift().dropna()
    # Create the target set selecting the Signal column and assiging it to y
    y = sig_df['Signal'].iloc[1:]
    y_sma = sig_df['Signal_sma'].iloc[1:]

    #Training test simple split
    X_train_scaled, X_test_scaled, y_train, y_test=train_test_scaled_split(X,y,train_begin,train_month)
    X_sma_train_scaled, X_sma_test_scaled, y_sma_train, y_sma_test=train_test_scaled_split(X,y_sma,train_begin,train_month)
    # Generate the predictions from different classifiers for different strategies from simple training test split
    pred_df= classifier_prediction(X_train_scaled, X_test_scaled, y_train, y_test)
    pred_sma_df= classifier_prediction(X_sma_train_scaled, X_sma_test_scaled, y_sma_train, y_sma_test)

    #sectional training test splits
    #train_range_months = 27
    #test_range_months = 6
    #X1_train_scaled, X1_test_scaled, y1_train, y1_test, split_index = train_test_split_scaled(X,y, train_range_months, test_range_months)

    # Get the actual returns from predictions
    test_start = y_test.index.min()
    returns_df = returns(sig_df,pred_df, test_start)
    #returns_df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)

    returns_sma_df = returns(sig_df,pred_sma_df, test_start)
    returns_sma_df['SMA'] = returns_sma_df['DailyReturns']*sig_df['Signal_sma'].loc[test_start:]
    returns_sma_df = returns_sma_df.drop(columns = 'DailyReturns')
    returns_sma_df = returns_sma_df.add_suffix('_sma')
    returns_df = pd.concat([returns_df,returns_sma_df], axis = 1)

    #cum_return = (1 + returns_df.iloc[:]).cumprod()
    #cum_sma_return = (1 + returns_sma_df.iloc[:]).cumprod()
    #cum_sma_return=cum_sma_return.drop(columns = 'Buy_Hold')
    #cum_sma_return = cum_sma_return.add_suffix('_sma')
    #cum_return = pd.concat([cum_return,cum_sma_return], axis = 1)
    return (returns_df,sig_df,X,y,pred_df,pred_sma_df)

In [225]:
from sklearn.metrics import accuracy_score
stock = 'AAPL'
# Set the short window and long window
short_window = 4
long_window = 100
train_begin = '2020-1-1'
train_month = 39
returns_df,s_df,X,y,pred_df,pred_sma_df = classifier_selector(stock, short_window, long_window, train_begin, train_month)
linear_prediction_df = pd.read_csv("./Data/out.csv", index_col = 'Date', parse_dates=True,
    infer_datetime_format =True)
prediction_returns_df = linear_prediction_df['BayesianRidge()_Close'].pct_change()
#s = pred_df['Actual_signal'].diff()/2
acc=accuracy_score(pred_df['Actual_signal'], pred_df['Predicted_svc'])
print(acc)
#accuracy_score(pred_df['Actual_signal'], pred_df['Predicted_DecisionTree'])

Trading Test for AAPL
training begins at 2020-1-1
training ends at 2023-04-01
test starts at 2023-04-02
training begins at 2020-1-1
training ends at 2023-04-01
test starts at 2023-04-02
Classification Report by SVC Prediction Model
              precision    recall  f1-score   support

        -1.0       0.50      0.98      0.66       113
         1.0       0.67      0.03      0.07       116

    accuracy                           0.50       229
   macro avg       0.58      0.51      0.36       229
weighted avg       0.58      0.50      0.36       229

Classification Report by DecisionTree Model
              precision    recall  f1-score   support

        -1.0       0.40      0.07      0.12       113
         1.0       0.50      0.90      0.64       116

    accuracy                           0.49       229
   macro avg       0.45      0.48      0.38       229
weighted avg       0.45      0.49      0.38       229

Classification Report by SVC Prediction Model
              precision 

In [231]:
pred_returns=prediction_returns_df[returns_df.index]
linear_predict_df =pd.DataFrame(pred_returns)
linear_predict_df.columns = ['DailyReturns']
linear_predict_df.index = pred_df.index
linear_predict_df['Predicted_svc'] = linear_predict_df['DailyReturns'] * pred_df['Predicted_svc']
linear_predict_df['Predicted_DecisionTree'] = linear_predict_df['DailyReturns'] * pred_df['Predicted_DecisionTree']

cum_linear_df=(1+linear_predict_df).cumprod()
cum_linear_df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)
cum_linear_df = cum_linear_df.add_suffix('_linear')
pltlinear1=cum_linear_df.hvplot(title = 'Strategy Cumulative Returns for AAPL linear Price Prediction')

hvplot.save(pltlinear1, './Strategy Cumulative Returns for AAPL linear Price Prediction.png')
pltlinear1

In [155]:
s = pred_df.diff()/2
print('Buy/Sell(1/-1) Transaction Times:')
print(s['Actual_signal'].value_counts())
print(s['Predicted_svc'].value_counts())
print(s['Predicted_DecisionTree'].value_counts())

s0=pd.DataFrame(returns_df['DailyReturns']*pred_df['Actual_signal'])
s0['Win']= np.where(s0>0, 1, 0)
if (len(s0)-s0['Win'].sum())==0:
    s1=pd.DataFrame(returns_df['DailyReturns']*pred_df['Predicted_svc'])
    s1['Win'] = np.where(s1>0, 1, 0)
    s1['Loss'] = np.where(s1[0]<0, 1, 0)
    win_ratio_svc=s1['Win'].sum()/s0['Win'].sum()
    win_size_svc = (1+s1[0]*s1['Win']).cumprod()
    loss_size_svc = (1+s1[0]*s1['Loss']).cumprod()
    print(f'SVC Prediction Win times ratio is {win_ratio_svc:.2f} with win cumreturn of {win_size_svc[-1]:.2f} and loss cumretur of {(loss_size_svc[-1]-1):.2f}')
    s2=pd.DataFrame(returns_df['DailyReturns']*pred_df['Predicted_DecisionTree'])
    s2['Win'] = np.where(s2>0, 1, 0)
    s2['Loss'] = np.where(s2[0]<0, 1, 0)
    win_ratio_DecisionTree=s2['Win'].sum()/s0['Win'].sum()
    win_size_DecisionTree = (1+s2[0]*s2['Win']).cumprod()
    loss_size_DecisionTree = (1+s2[0]*s2['Loss']).cumprod()
    print(f'Decision Tree Prediction Win times ratio is {win_ratio_DecisionTree:.2f} with wining cum return of {win_size_DecisionTree[-1]:.2f} and loss cumretur of {loss_size_DecisionTree[-1]-1:.2f}')


Buy/Sell(1/-1) Transaction Times:
Actual_signal
 0.0    63
 1.0    30
-1.0    29
Name: count, dtype: int64
Predicted_svc
 0.0    114
-1.0      4
 1.0      4
Name: count, dtype: int64
Predicted_DecisionTree
 0.0    112
-1.0      5
 1.0      5
Name: count, dtype: int64
SVC Prediction Win times ratio is 0.59 with win cumreturn of 1.52 and loss cumretur of -0.25
Decision Tree Prediction Win times ratio is 0.63 with wining cum return of 1.60 and loss cumretur of -0.22


In [153]:
df=(1+returns_df[:15]).cumprod()
df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)
pltsp1=df.hvplot(title = 'Strategy 15-day Cumulative Returns for S&P500')

hvplot.save(pltsp1, './Picture/Strategy Prediction 15day Returns for S&P500.png')
pltsp1

In [227]:
df=(1+returns_df).cumprod()
df.rename(columns={'DailyReturns':'Buy_Hold'}, inplace=True)
pltsp2=df.hvplot(title = 'Strategy Cumulative Returns for S&P500')
hvplot.save(pltsp2, './Strategy Prediction Returns for S&P500.png')
pltsp2

In [242]:
p1=df[['Buy_Hold','Strategy Returns_svc','Strategy Returns_DecisionTree']].hvplot()
plt_pred_1=pltlinear1*p1
plt_pred_1