In [103]:
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import datetime as dt
import yfinance as yf
from sklearn import preprocessing
import matplotlib.pyplot as plt 
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns

In [104]:
from fredapi import Fred
fred = Fred(api_key = 'ec682064ba501918755568565884325e')
start_date = ('01/01/2000')
end_date = ('12/31/2017')

eco_unc = pd.Series((fred.get_series('WLEMUINDXD', start_date, end_date)), name = 'eco_unc')
tbill = pd.Series((fred.get_series('DTB3', start_date, end_date)), name = 'tbill')
wti = pd.Series((fred.get_series('DCOILWTICO', start_date, end_date)), name = 'wti')
effr = pd.Series((fred.get_series('EFFR', start_date, end_date)), name = 'effr')


SPX = yf.download('^GSPC', start=start_date, end=end_date)
RUA = yf.download('^RUA', start=start_date, end=end_date)
# price is already adjusted
SPX = SPX.drop(['Adj Close'], axis=1)
RUA = RUA.drop(['Adj Close'], axis=1)
# 10/200 MA
SPX['MA200'] = SPX['Close'].rolling(window=200).mean()
RUA['MA200'] = RUA['Close'].rolling(window=200).mean()
SPX['MA10'] = SPX['Close'].rolling(window=10).mean()
RUA['MA10'] = RUA['Close'].rolling(window=10).mean()
# ATR 10 for volatility
# SPX['ATR10'] = atr(high=SPX['High'], low=SPX['Low'], close=SPX['Close'], window=10)
# RUA['ATR10'] = atr(high=RUA['High'], low=RUA['Low'], close=RUA['Close'], window=10)
# Daily returns
SPX['Return'] = SPX['Close'].pct_change()
RUA['Return'] = RUA['Close'].pct_change()
# StDev of daily returns
SPX['StDev'] = SPX['Return'].rolling(10).std()
RUA['StDev'] = RUA['Return'].rolling(10).std()
# Side column if MA10 > MA200 1 (long), else -1 (short)
SPX['Side'] = np.where(SPX['MA10'] > SPX['MA200'], 1, -1) 
RUA['Side'] = np.where(RUA['MA10'] > RUA['MA200'], 1, -1)
# Shift side column forward by 1 so that we don't commit lookahead bias
SPX['Side'] = SPX['Side'].shift(1)
RUA['Side'] = RUA['Side'].shift(1)

SPX['StratReturns'] = 0
RUA['StratRetiuns'] = 0

# drop values for first 200 days since no MA200
SPX.dropna(inplace=True)
RUA.dropna(inplace=True)


In [105]:
data = pd.concat([eco_unc, tbill, wti, effr], axis =1).dropna()
len(effr)

4565

In [106]:
st_date = dt.datetime(2000, 1, 1)
ed_date = dt.datetime(2017, 12, 31)
SNP500 = yf.download('^GSPC', st_date, ed_date)
SNP500 = SNP500['Adj Close'].to_frame()
SNP500.fillna(method = 'bfill', inplace=True)
SNP500['SMA 5'] = SNP500['Adj Close'].rolling(5).mean()
SNP500['SMA 200'] = SNP500['Adj Close'].rolling(200).mean()
conditions = [SNP500['SMA 5'] >= SNP500['SMA 200'],
              SNP500['SMA 5'] < SNP500['SMA 200']]
choices = [1, 0]
SNP500['outcome'] = np.select(conditions, choices, 0)
# SNP500 = SNP500['outcome'].dropna()


[*********************100%***********************]  1 of 1 completed


In [107]:
features = pd.merge(data, SNP500, left_index = True, right_index = True).dropna()
X = features[['eco_unc', 'tbill', 'wti', 'effr']] 
y = features['outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.999, random_state=911)

In [101]:
vif = pd.DataFrame()
vif["ft"] = data.columns
vif["VIF"] = [variance_inflation_factor(data.values, i)
                         for i in range(len(data.columns))]
vif

Unnamed: 0,ft,VIF
0,eco_unc,1.29264
1,tbill,104.428563
2,wti,1.511427
3,effr,107.554187


In [102]:
log_regression = LogisticRegression()
log_regression.fit(X_train,y_train)

y_pred = log_regression.predict(X_test)

print('score', log_regression.score(X_test, y_test))   #Return the mean accuracy on the given test data and labels.

from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

score 0.756200280767431
              precision    recall  f1-score   support

           0       0.62      0.66      0.64      1390
           1       0.83      0.80      0.82      2884

    accuracy                           0.76      4274
   macro avg       0.72      0.73      0.73      4274
weighted avg       0.76      0.76      0.76      4274



In [109]:
features.to_csv('features.csv')