In [91]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix

In [92]:
ba_volatility = pd.read_csv('../Inertia Trading/ba_weekly_return_volatility.csv')
ba_detailed = pd.read_csv("../Inertia Trading/ba_weekly_return_detailed.csv")

labels = ba_detailed[['Year','Week_Number', 'label']].drop_duplicates()

ba_volatility = ba_volatility.merge(
    labels, 
    on=['Year','Week_Number'], 
    how='inner'
)
ba_volatility.head()

Unnamed: 0,Year,Week_Number,mean_return,volatility,label
0,2020,0,-0.084,0.118794,green
1,2020,1,-0.1612,1.584772,green
2,2020,2,-0.3456,1.269723,green
3,2020,3,-0.05525,2.818341,green
4,2020,4,-0.2888,1.510424,green


In [93]:
# created functions for strategy comparison to avoid code duplication
def make_weekly_prices(ba_detailed):
    return (
        ba_detailed.groupby(['Year','Week_Number'], as_index=False)
        .agg(Open_w=('Open','first'), Close_w=('Close','last'))
        .sort_values(['Year','Week_Number'])
        .reset_index(drop=True)
    )
    
def buy_and_hold(weekly_prices, initial=100.0):
    wp = weekly_prices.sort_values(['Year','Week_Number']).reset_index(drop=True)
    yearly = (
        wp.groupby('Year', as_index=False)
          .agg(Close_y=('Close_w','last'))
          .sort_values('Year')
          .reset_index(drop=True)
    )
    shares = initial / wp.iloc[0]['Open_w']
    yearly['BuyHold'] = (shares * yearly['Close_y']).round(2)
    return yearly

def trading(df, label_col='predicted_label', green_value='green', initial=100.0):
    cash = initial
    shares = 0
    results = {}
    
    for i in range(len(df)):
        this_week = df.iloc[i]
        next_week = df.iloc[i+1] if i+1 < len(df) else None 
        
        if(shares == 0 and this_week[label_col] == green_value):
            shares = cash / this_week['Open_w']
            cash = 0
            
        if shares > 0 and ((next_week is None) or next_week[label_col] != green_value):
            cash = shares * this_week['Close_w']
            shares = 0

        year_end = (i == len(df)-1) or (this_week['Year'] != next_week['Year'])
        if year_end: #store yearly earnings/losses
            wealth = shares*this_week['Close_w'] if shares > 0 else cash
            results[this_week['Year']] = round(wealth, 2)
    return df[['Year']].drop_duplicates().assign(value=df['Year'].map(results))['value']

def compare_strategies(ba_detailed, labels_df, strategy_name, label_col='predicted_label', green_value='green', initial=100.0):
    weekly_prices = make_weekly_prices(ba_detailed)
    weekly_prices = weekly_prices.merge(labels_df, on=['Year','Week_Number'], how='inner').sort_values(['Year','Week_Number']).reset_index(drop=True)
    
    portfolio = buy_and_hold(weekly_prices, initial=initial)
    trad_str = trading(weekly_prices, label_col=label_col, green_value=green_value, initial=initial)
    trad_str_df = pd.DataFrame({'Year': weekly_prices['Year'].unique(), strategy_name: trad_str})

    portfolio = portfolio.merge(trad_str_df, on='Year', how='left')
    return portfolio

In [94]:
train_df = ba_volatility[ba_volatility['Year'] < 2023]
test_df = ba_volatility[ba_volatility['Year'] >= 2023]

## Linear SVM

In [95]:
features = ['mean_return', 'volatility']
X_train = train_df[features].values
le = LabelEncoder()
Y_train = le.fit_transform(train_df['label'].values)

X_test = test_df[features].values
Y_test = le.transform(test_df['label'].values)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_classifier = svm.SVC(kernel='linear')
svm_classifier.fit(X_train_scaled, Y_train)

predicted = svm_classifier.predict(X_test_scaled)
acc = np.mean(predicted==Y_test)
print(f"Testing accuracy: {acc:.3f}")

Testing accuracy: 0.962


In [96]:
cm = confusion_matrix(Y_test, predicted)

tn, fp, fn, tp = cm.ravel()
tpr = tp/(fn+tp)
tnr = tn/(fp+tn)

print(f"\nConfusion Matrix:\n{cm}")
print(f"TPR: {tpr:.3f}")
print(f"TNR: {tnr:.3f}")


Confusion Matrix:
[[89  0]
 [ 4 12]]
TPR: 0.750
TNR: 1.000


Linear SVM model achieved 96.19% accuracy on testing dataset, where the model correctly identified 75% of the actual positive (buy) signals and correctly labeled all negative(hold/sell) cases with no false positives.

In [97]:
test_df = test_df.copy()
test_df['pred_linear'] = le.inverse_transform(predicted)

compare_strategies(ba_detailed, test_df, label_col='pred_linear', strategy_name="Linear SVM")

Unnamed: 0,Year,Close_y,BuyHold,Linear SVM
0,2023,260.660004,135.09,159.58
1,2024,176.550003,91.5,234.92


Compared to the Buy-and-Hold strategy, SVM yields higher final portfolio value—especially evident in 2024, where Linear SVM (234.92) outperforms Buy-and-Hold (91.50) despite market decline, indicating adaptive strength under volatile conditions.

## Gaussian SVM

In [98]:
features = ['mean_return', 'volatility']
X_train = train_df[features].values
le = LabelEncoder()
Y_train = le.fit_transform(train_df['label'].values)

X_test = test_df[features].values
Y_test = le.transform(test_df['label'].values)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_classifier = svm.SVC(kernel='rbf')
svm_classifier.fit(X_train_scaled, Y_train)

predicted = svm_classifier.predict(X_test_scaled)
acc = np.mean(predicted==Y_test)
print(f"Testing accuracy: {acc:.3f}")

Testing accuracy: 0.971


In [99]:
cm = confusion_matrix(Y_test, predicted)

tn, fp, fn, tp = cm.ravel()
tpr = tp/(fn+tp)
tnr = tn/(fp+tn)

print(f"\nConfusion Matrix:\n{cm}")
print(f"TPR: {tpr:.3f}")
print(f"TNR: {tnr:.3f}")


Confusion Matrix:
[[89  0]
 [ 3 13]]
TPR: 0.812
TNR: 1.000


The Gaussian SVM model achieved an accuracy of 97.14%, slightly outperforming the linear version.
It correctly identified 81.2% of buy signals (TPR = 0.812) and maintained a perfect TNR of 1.0, meaning it made no false buy predictions.

In [100]:
test_df = test_df.copy()
test_df['pred_gaussian'] = le.inverse_transform(predicted)

compare_strategies(ba_detailed, test_df, label_col='pred_gaussian', strategy_name="Gaussian SVM")

Unnamed: 0,Year,Close_y,BuyHold,Gaussian SVM
0,2023,260.660004,135.09,159.58
1,2024,176.550003,91.5,245.92


Overall, the model shows strong precision and adaptability, generating higher portfolio returns than the Buy-and-Hold strategy—especially in 2024, where it reached 245.92 compared to 91.50 for Buy-and-Hold. And this model outperformed the Linear SVM.

## Polynomial SVM (degree=2)

In [101]:
features = ['mean_return', 'volatility']
X_train = train_df[features].values
le = LabelEncoder()
Y_train = le.fit_transform(train_df['label'].values)

X_test = test_df[features].values
Y_test = le.transform(test_df['label'].values)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_classifier = svm.SVC(kernel='poly', degree=2)
svm_classifier.fit(X_train_scaled, Y_train)

predicted = svm_classifier.predict(X_test_scaled)
acc = np.mean(predicted==Y_test)
print(f"Testing accuracy: {acc:.3f}")

Testing accuracy: 0.848


In [102]:
cm = confusion_matrix(Y_test, predicted)
tn, fp, fn, tp = cm.ravel()
tpr = tp/(fn+tp)
tnr = tn/(fp+tn)

print(f"\nConfusion Matrix:\n{cm}")
print(f"TPR: {tpr:.3f}")
print(f"TNR: {tnr:.3f}")


Confusion Matrix:
[[89  0]
 [16  0]]
TPR: 0.000
TNR: 1.000


The Polynomial SVM (degree = 2) achieved an accuracy of 84.76%, which is notably lower than both linear and Gaussian SVMs.
While it perfectly identified all non-buy (negative) cases (TNR = 1.0), it failed to detect any buy signals (TPR = 0.0), predicting all instances as negative.

In [103]:
test_df = test_df.copy()
test_df['pred_poly2'] = le.inverse_transform(predicted)

compare_strategies(ba_detailed, test_df, label_col='pred_poly2', strategy_name="Poly SVM (d=2)")

Unnamed: 0,Year,Close_y,BuyHold,Poly SVM (d=2)
0,2023,260.660004,135.09,135.09
1,2024,176.550003,91.5,91.5


As a result, its trading performance matched the Buy-and-Hold strategy exactly, showing no advantage in adapting to market changes.