In [119]:
import numpy as np
import pandas as pd
from finta import TA
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [41]:
# The trading algorithm will be tested and evaluated over three timelines:

# dcb = Dot Com Bubble
dcb_start = '1997-06-01'
dcb_end = '2002-12-01'

# crsh = 2008 Crash
crsh_start = '2007-06-01'
crsh_end = '2012-12-01'

# cvd = COVID-19
cvd_start = '2020-03-01'
cvd_end = '2022-06-01'

short_window = 4
long_window = 100
initial_capital = 100000.0
share_size = 100
start_date = dcb_start
end_date = dcb_end
stock = 'SP 500'

In [146]:
def get_under_over_signals(data=pd.DataFrame):
    
    df = data.drop(columns=['Open', 'Low', 'High'])

    df['Actual Returns'] = df['Close'].pct_change()

    df['Signal'] = 0.0
    df['Signal'] = np.where(
        (df['Actual Returns'] >= 0), 1.0, 0.0
    )

    df = df.drop(columns=['Close', 'Actual Returns'])
    df = df.dropna().sort_index(axis='columns')

    return df

In [95]:
def get_fast_slow_sma(data=pd.DataFrame, short_window=short_window, long_window=long_window):

    df = data.drop(columns=['Open', 'Low', 'High'])

    # Generate the fast and slow simple moving averages
    df['sma_fast'] = (
        df['Close'].rolling(window=short_window).mean()
    )
    df['sma_slow'] = (
        df['Close'].rolling(window=long_window).mean()
    )

    # Sort the index
    df = df.drop(columns='Close').dropna().sort_index(axis='columns')

    return df

In [44]:
# Split any DataFrame into 75/25 train/test split
def get_training_dates(df):

    training_start = df.index.min()

    split_point = int(df.shape[0] * 0.75)
    training_end = df.iloc[split_point].name

    return training_start, training_end

In [140]:
# Always requires a 1D dataframe with a 'Close' column
def train_svm(data=pd.DataFrame):

    df = data.dropna()

    X = get_fast_slow_sma(df)

    X_start = X.iloc[0].name
    X_end = X.iloc[-1].name

    y = get_under_over_signals(df[X_start:X_end])
    y = np.ravel(y)

    X_sc = StandardScaler().fit_transform(X)

    svm = LinearSVC()
    svm = svm.fit(X_sc, y)

    return svm

In [141]:
stock = 'SP 500'

ohlc_df = pd.read_csv('data/markets_ohlc.csv', header=[0,1], index_col=0)
ohlc_df = ohlc_df[stock].copy()

In [147]:
trained_svm = train_svm(ohlc_df)

In [148]:
X_test = get_fast_slow_sma(ohlc_df)
X_test = X_test[dcb_start:dcb_end].copy()
X_test_sc = StandardScaler().fit_transform(X_test)

y_test = get_under_over_signals(ohlc_df[dcb_start:dcb_end])

y_pred = trained_svm.predict(X_test)



In [149]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.52      0.41      0.46       690
         1.0       0.51      0.62      0.56       694

    accuracy                           0.52      1384
   macro avg       0.52      0.52      0.51      1384
weighted avg       0.52      0.52      0.51      1384

