In [1]:
# Import dependencies

import joblib
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import streamlit as st
from joblib import Parallel, delayed
from pandas.tseries.offsets import DateOffset
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from finta import TA

In [36]:
# Import data
ohlc_df = pd.read_csv('data/markets_ohlc.csv', header=[0,1], index_col=0)

In [2]:
# Import trained models
svm_SP500 = joblib.load('models/linear_svm_SP 500.pkl')
svm_NASDAQ100 = joblib.load('models/linear_svm_NASDAQ 100.pkl')
svm_RUSSELL2000 = joblib.load('models/linear_svm_RUSSELL 2000.pkl')  

In [3]:
# Declare Constants

# dcb = Dot Com Bubble
dcb_start = '1997-06-01'
dcb_end = '2002-12-01'

# crsh = 2008 Crash
crsh_start = '2007-06-01'
crsh_end = '2012-12-01'

# cvd = COVID-19
cvd_start = '2020-03-01'
cvd_end = '2022-06-01'

short_window = 4
long_window = 100
initial_capital = 100000.0
share_size = 100
start_date = dcb_start
end_date = dcb_end
stock = 'SP 500'

In [35]:
def get_under_over_signals(data=ohlc_df):
    """
    Create a signal based on the current day's closing price being higher or
    lower than yesterdays.

    Returns a date-indexed single column dataframe of the signal.
    """
    df = data.copy()

    df['Actual Returns'] = df['Close'].pct_change()

    df['Signal'] = 0.0
    df['Signal'] = np.where(
        (df['Actual Returns'] >= 0), 1.0, 0.0
    )

    df = df.drop(
        columns=['Close', 'Open', 'Low', 'High', 'Actual Returns']
    )
    df = df.dropna().sort_index(axis='columns')

    return df

In [27]:
def get_fast_slow_sma(data=pd.DataFrame, short_window=short_window, long_window=long_window):
    """
    Create a signal based on the current day's closing price being higher or
    lower than yesterdays.

    Returns a date-indexed dataframe with SMA Fast, and SMA Slow columns
    """

    df = data.drop(columns=['Open', 'Low', 'High'])

    # Generate the fast and slow simple moving averages
    df['SMA Fast'] = (
        df['Close'].rolling(window=short_window).mean()
    )
    df['SMA Slow'] = (
        df['Close'].rolling(window=long_window).mean()
    )

    # Sort the index
    df = df.drop(columns='Close').dropna().sort_index(axis='columns')

    return df

In [12]:
def get_ohlc_data(df=ohlc_df, start=dcb_start, end=dcb_end, stock=stock):
    """
    Takes a single dimension OHLC dataframe and returns a copy of it within the 
    boundaries of start and end.
    """

    df = df[stock].copy()
    df = df[start:end]
    
    return df

In [50]:
def make_svm_predictions(start=dcb_start, end=dcb_end, stock=stock):

    # Get the appropriate feature set
    X_data = get_ohlc_data(start=dcb_start, end=dcb_end, stock=stock)
    X = get_fast_slow_sma(X_data)

    # Use the feature set to predict the target
    y_pred = svm_NASDAQ100.predict(X)

    # get the boundary datestrings of X's date-index
    X_start = X.iloc[0].name
    X_end = X.iloc[-1].name

    # Get the y_true values corresponding to the predicted set
    y_true = get_under_over_signals(X_data[X_start:X_end]).values
    y_true = np.ravel(y_true)

    df = pd.DataFrame({
        'y_pred': y_pred,
        'y_true': y_true
    }, index=X.index)

    return df


In [51]:
predictions = make_svm_predictions(start=dcb_start, end=dcb_end, stock=stock)



In [52]:
predictions

Unnamed: 0_level_0,y_pred,y_true
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1997-10-21,0.0,0.0
1997-10-22,0.0,0.0
1997-10-23,0.0,0.0
1997-10-24,0.0,0.0
1997-10-27,0.0,0.0
...,...,...
2002-11-22,0.0,0.0
2002-11-25,0.0,1.0
2002-11-26,0.0,0.0
2002-11-27,0.0,1.0
