In [39]:
import numpy as np
import pandas as pd
from finta import TA
from sklearn.svm import LinearSVC, SVR
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
import sklearn.metrics as skm
from joblib import Parallel, delayed
import joblib

In [85]:
# The trading algorithm will be tested and evaluated over three timelines:

# dcb = Dot Com Bubble
dcb_start = '1997-06-01'
dcb_end = '2002-12-01'

# crsh = 2008 Crash
crsh_start = '2007-06-01'
crsh_end = '2012-12-01'

# cvd = COVID-19
cvd_start = '2020-03-01'
cvd_end = '2022-06-01'

short_window = 4
long_window = 100
initial_capital = 100000.0
share_size = 100
start = crsh_start
end = crsh_end
stock = 'S&P 500'
# stock = 'NASDAQ 100'
# stock = 'RUSSELL 2000'

In [41]:
def get_under_over_signals(data=pd.DataFrame):
    
    df = data.drop(columns=['Open', 'Low', 'High'])

    df['Actual Returns'] = df['Close'].pct_change()

    df['Signal'] = 0.0
    df['Signal'] = np.where(
        (df['Actual Returns'] < 0), 1.0, 0.0
    )

    df = df.drop(columns=['Close', 'Actual Returns'])
    df = df.dropna().sort_index(axis='columns')

    return df

In [42]:
def get_fast_slow_sma(data=pd.DataFrame, short_window=short_window, long_window=long_window):

    df = data.drop(columns=['Open', 'Low', 'High'])

    # Generate the fast and slow simple moving averages
    df['sma_fast'] = (
        df['Close'].rolling(window=short_window).mean()
    )
    df['sma_slow'] = (
        df['Close'].rolling(window=long_window).mean()
    )

    # Sort the index
    df = df.drop(columns='Close').dropna().sort_index(axis='columns')

    return df

In [43]:
# Split any DataFrame into 75/25 train/test split
def get_training_dates(df):

    training_start = df.index.min()

    split_point = int(df.shape[0] * 0.75)
    training_end = df.iloc[split_point].name

    return training_start, training_end

In [44]:
# Always requires a 1D dataframe with a 'Close' column
def train_linear_svm(data=pd.DataFrame):

    df = data.dropna()

    X = get_fast_slow_sma(df)

    X_start = X.iloc[0].name
    X_end = X.iloc[-1].name

    y = get_under_over_signals(df[X_start:X_end])
    y = np.ravel(y)

    X_sc = StandardScaler().fit_transform(X)

    model = LinearSVC()
    model = model.fit(X_sc, y)

    return model

In [45]:
# Always requires a 1D dataframe with a 'Close' column
def train_svr(data=pd.DataFrame):

    df = data.dropna()

    X = get_fast_slow_sma(df)

    X_start = X.iloc[0].name
    X_end = X.iloc[-1].name

    y = get_under_over_signals(df[X_start:X_end])
    y = np.ravel(y)

    X_sc = StandardScaler().fit_transform(X)

    model = SVR()
    model = model.fit(X_sc, y)

    return model

In [62]:
def get_metric(model, metric):

    X_test = get_fast_slow_sma(ohlc_df)
    X_test = X_test[start:end].copy()
    X_test_sc = StandardScaler().fit_transform(X_test)

    y_true = get_under_over_signals(ohlc_df[start:end]).values
    y_true = np.ravel(y_true)

    y_pred = model.predict(X_test_sc)
    y_pred = np.ravel(y_pred)

    return 

    # if metric == 'auc': 
    #     fpr, tpr, thresholds = skm.roc_curve(y_true, y_pred, pos_label=2)
    #     skm.auc(fpr, tpr)
    #     return skm.auc(fpr, tpr)
    # elif metric == 'roc_auc_score': return skm.auc(X_test, y_pred)

    return


In [48]:
ohlc_df = pd.read_csv(
    'data/markets_ohlc.csv', 
    header=[0,1], 
    index_col=0, 
    infer_datetime_format=True, 
    parse_dates=True
)

ohlc_df = ohlc_df[stock].copy()

In [36]:
trained_linear_svm = train_linear_svm(ohlc_df)
joblib.dump(trained_linear_svm, f'models/linear_svm_{stock}.pkl')

['models/linear_svm_S&P 500.pkl']

In [49]:
trained_svr = train_svr(ohlc_df)
joblib.dump(trained_svr, f'models/svr_{stock}.pkl')

['models/svr_S&P 500.pkl']

In [None]:
# def get_svc_metrics(model, metric_type):

#     X_test = get_fast_slow_sma(ohlc_df)
#     X_test = X_test[start:end].copy()
#     X_test_sc = StandardScaler().fit_transform(X_test)

#     y_true = get_under_over_signals(ohlc_df[start:end]).values
#     y_true = y_true.flatten()

#     y_pred = model.predict(X_test_sc)
#     y_pred = y_pred.flatten()

    

#     if metric_type == 'auc': 
#         return skm.mean_squared_error(y_true, y_pred)
    # elif metric_type == 'roc_auc_score': return skm.auc(X_test, y_pred)

In [83]:
def get_svr_metrics(model, metric_type):

    X_test = get_fast_slow_sma(ohlc_df)
    X_test = X_test[start:end].copy()
    X_test_sc = StandardScaler().fit_transform(X_test)

    y_true = get_under_over_signals(ohlc_df[start:end]).values
    y_true = y_true.flatten()

    y_pred = model.predict(X_test_sc)
    y_pred = y_pred.flatten()
    

    if metric_type == 'mse': 
        return skm.mean_squared_error(y_true, y_pred)
    if metric_type == 'mae': 
        return skm.mean_absolute_error(y_true, y_pred)
    if metric_type == 'evs':
        return skm.explained_variance_score(y_true, y_pred)
    # elif metric_type == 'roc_auc_score': return skm.auc(X_test, y_pred)

In [86]:
get_svr_metrics(trained_svr, 'mae')

0.46926917996594925

In [64]:
X_test = get_fast_slow_sma(ohlc_df)
X_test = X_test[start:end].copy()
X_test_sc = StandardScaler().fit_transform(X_test)

y_test = get_under_over_signals(ohlc_df[start:end])

y_pred = trained_linear_svm.predict(X_test_sc)

# print(skm.classification_report(y_test, y_pred))
trained_svr.score(X_test, y_test)



-0.19005662755277863

In [63]:
trained_svr.score()



nan