In [7]:
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [8]:
ba_volatility = pd.read_csv('../Inertia Trading/ba_weekly_return_volatility.csv')
ba_detailed = pd.read_csv("../Inertia Trading/ba_weekly_return_detailed.csv")

labels = ba_detailed[['Year','Week_Number', 'label']].drop_duplicates()

ba_volatility = ba_volatility.merge(
    labels, 
    on=['Year','Week_Number'], 
    how='inner'
)
ba_volatility.head()

Unnamed: 0,Year,Week_Number,mean_return,volatility,label
0,2020,0,-0.084,0.118794,green
1,2020,1,-0.1612,1.584772,green
2,2020,2,-0.3456,1.269723,green
3,2020,3,-0.05525,2.818341,green
4,2020,4,-0.2888,1.510424,green


In [9]:
train_df = ba_volatility[ba_volatility['Year'] < 2023]
test_df = ba_volatility[ba_volatility['Year'] >= 2023]

In [10]:
features = ['mean_return', 'volatility']
X_train = train_df[features].values
le = LabelEncoder()
Y_train = le.fit_transform(train_df['label'].values)

X_test = test_df[features].values
Y_test = le.transform(test_df['label'].values)

tree_classifier = tree.DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=79)
tree_classifier.fit(X_train, Y_train)
predicted = tree_classifier.predict(X_test)
accuracy = np.mean(predicted == Y_test)
print(accuracy)

1.0


In [11]:
cm = confusion_matrix(Y_test, predicted)
tn, fp, fn, tp = cm.ravel()
print(cm)

tpr = tp / (tp + fn)
tnr = tn / (fp + tn)
print(f"TPR: {tpr:.3f}")
print(f"TNR: {tnr:.3f}")

[[89  0]
 [ 0 16]]
TPR: 1.000
TNR: 1.000


In [12]:
# created functions for strategy comparison to avoid code duplication
def make_weekly_prices(ba_detailed):
    return (
        ba_detailed.groupby(['Year','Week_Number'], as_index=False)
        .agg(Open_w=('Open','first'), Close_w=('Close','last'))
        .sort_values(['Year','Week_Number'])
        .reset_index(drop=True)
    )
    
def buy_and_hold(weekly_prices, initial=100.0):
    wp = weekly_prices.sort_values(['Year','Week_Number']).reset_index(drop=True)
    yearly = (
        wp.groupby('Year', as_index=False)
          .agg(Close_y=('Close_w','last'))
          .sort_values('Year')
          .reset_index(drop=True)
    )
    shares = initial / wp.iloc[0]['Open_w']
    yearly['BuyHold'] = (shares * yearly['Close_y']).round(2)
    return yearly

def trading(df, label_col='predicted_label', green_value='green', initial=100.0):
    cash = initial
    shares = 0
    results = {}
    
    for i in range(len(df)):
        this_week = df.iloc[i]
        next_week = df.iloc[i+1] if i+1 < len(df) else None 
        
        if(shares == 0 and this_week[label_col] == green_value):
            shares = cash / this_week['Open_w']
            cash = 0
            
        if shares > 0 and ((next_week is None) or next_week[label_col] != green_value):
            cash = shares * this_week['Close_w']
            shares = 0

        year_end = (i == len(df)-1) or (this_week['Year'] != next_week['Year'])
        if year_end: #store yearly earnings/losses
            wealth = shares*this_week['Close_w'] if shares > 0 else cash
            results[this_week['Year']] = round(wealth, 2)
    return df[['Year']].drop_duplicates().assign(value=df['Year'].map(results))['value']

def compare_strategies(ba_detailed, labels_df, strategy_name, label_col='predicted_label', green_value='green', initial=100.0):
    weekly_prices = make_weekly_prices(ba_detailed)
    weekly_prices = weekly_prices.merge(labels_df, on=['Year','Week_Number'], how='inner').sort_values(['Year','Week_Number']).reset_index(drop=True)
    
    portfolio = buy_and_hold(weekly_prices, initial=initial)
    trad_str = trading(weekly_prices, label_col=label_col, green_value=green_value, initial=initial)
    trad_str_df = pd.DataFrame({'Year': weekly_prices['Year'].unique(), strategy_name: trad_str})

    portfolio = portfolio.merge(trad_str_df, on='Year', how='left')
    return portfolio

In [14]:
test_df = test_df.copy()
test_df['predicted_label'] = le.inverse_transform(predicted)
# test_df.head()

dt_portfolio = compare_strategies(ba_detailed, test_df, strategy_name="GaussianNB")
print(dt_portfolio)

   Year     Close_y  BuyHold  GaussianNB
0  2023  260.660004   135.09      181.28
1  2024  176.550003    91.50      294.39


In [17]:
# gini
tree_classifier_gini = tree.DecisionTreeClassifier(criterion='gini')
tree_classifier_gini.fit(X_train, Y_train)
predicted_gini = tree_classifier_gini.predict(X_test)
accuracy_gini = np.mean(predicted_gini == Y_test)
print(accuracy_gini)

cm = confusion_matrix(Y_test, predicted_gini)
tn, fp, fn, tp = cm.ravel()
print(cm)

tpr = tp / (tp + fn)
tnr = tn / (fp + tn)
print(f"TPR: {tpr:.3f}")
print(f"TNR: {tnr:.3f}")

test_df = test_df.copy()
test_df['predicted_label_gini'] = le.inverse_transform(predicted_gini)
print(test_df.head())

dt_portfolio = compare_strategies(ba_detailed, test_df, label_col='predicted_label_gini', strategy_name="GaussianNB")
print(dt_portfolio)

1.0
[[89  0]
 [ 0 16]]
TPR: 1.000
TNR: 1.000
     Year  Week_Number  mean_return  volatility  label predicted_label  \
157  2023            1      2.84125    1.618816  green           green   
158  2023            2      0.12040    1.912990  green           green   
159  2023            3     -0.87075    0.492591  green           green   
160  2023            4      0.42620    0.875527  green           green   
161  2023            5     -0.48200    1.689419  green           green   

    predicted_label_gini  
157                green  
158                green  
159                green  
160                green  
161                green  
   Year     Close_y  BuyHold  GaussianNB
0  2023  260.660004   135.09      181.28
1  2024  176.550003    91.50      294.39


In [16]:
tree_classifier_gini

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0
