In [182]:
%matplotlib inline

import requests
import pandas as pd 
import matplotlib.pyplot as plt

from pandas.plotting import scatter_matrix

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split

START_DATE = '2011-01-03'
END_DATE = '2019-04-03'

In [183]:
# import files and convert to dataframe
file_name = "data/momentum-features.csv"
df = pd.read_csv(file_name)

In [184]:
df.set_index(['Symbol', 'Date'], inplace=True)

In [185]:
df.reset_index(inplace=True)

In [186]:
# initialize percent change positive/negative binary
# copy values from percent change daily before data manipulation
df['Pct_Change_Class'] = df['Pct_Change_Daily']

In [187]:
# if percent positive, assign 1; else assign 0
df['Pct_Change_Class'].where(df['Pct_Change_Class'] < 0, other=1, inplace=True)
df['Pct_Change_Class'].where(df['Pct_Change_Class'] > 0, other=0, inplace=True)
df.head()

Unnamed: 0,Symbol,Date,High,Low,Open,Close,Volume,AdjClose,Pct_Change_Daily,Pct_Change_Monthly,Pct_Change_Yearly,RSI,Volatility,Yearly_Return_Rank,Monthly_Return_Rank,Pct_Change_Class
0,A,2011-01-03,30.143061,29.620888,29.728184,29.957081,4994000.0,27.591616,,,,,,,,1.0
1,A,2011-01-04,30.114449,29.456366,30.035765,29.678112,5017200.0,27.334681,-0.009312,,,,,,,0.0
2,A,2011-01-05,29.849785,29.32761,29.513592,29.613733,4519000.0,27.275387,-0.002169,,,,,,,0.0
3,A,2011-01-06,29.928469,29.477825,29.592276,29.670958,4699000.0,27.328091,0.001932,,,,,,,1.0
4,A,2011-01-07,29.899857,29.356224,29.699572,29.771101,3810900.0,27.420322,0.003375,,,,,,,1.0


In [188]:
# set index on symbol
df.set_index('Symbol', inplace=True)
df.head()

Unnamed: 0_level_0,Date,High,Low,Open,Close,Volume,AdjClose,Pct_Change_Daily,Pct_Change_Monthly,Pct_Change_Yearly,RSI,Volatility,Yearly_Return_Rank,Monthly_Return_Rank,Pct_Change_Class
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
A,2011-01-03,30.143061,29.620888,29.728184,29.957081,4994000.0,27.591616,,,,,,,,1.0
A,2011-01-04,30.114449,29.456366,30.035765,29.678112,5017200.0,27.334681,-0.009312,,,,,,,0.0
A,2011-01-05,29.849785,29.32761,29.513592,29.613733,4519000.0,27.275387,-0.002169,,,,,,,0.0
A,2011-01-06,29.928469,29.477825,29.592276,29.670958,4699000.0,27.328091,0.001932,,,,,,,1.0
A,2011-01-07,29.899857,29.356224,29.699572,29.771101,3810900.0,27.420322,0.003375,,,,,,,1.0


In [189]:
# initialize new rolling average features
df['Rolling_Yearly_Mean_Positive_Days'] = df['Pct_Change_Class']
df['Rolling_Monthly_Mean_Positive_Days'] = df['Pct_Change_Class']
df['Rolling_Monthly_Mean_Price'] = df['AdjClose']
df['Rolling_Yearly_Mean_Price'] = df['AdjClose']

In [190]:
# use pandas rolling method to calculate moving averages on selected featurs on a monthly and yearly basis
YEARLY_TRADING_DAYS = 252
MONTHLY_TRADING_DAYS = 21
rolling_monthly_up_days = df.groupby(level=0)['Rolling_Monthly_Mean_Positive_Days'].rolling(MONTHLY_TRADING_DAYS, min_periods=MONTHLY_TRADING_DAYS).mean()
rolling_yearly_up_days = df.groupby(level=0)['Rolling_Yearly_Mean_Positive_Days'].rolling(YEARLY_TRADING_DAYS, min_periods=YEARLY_TRADING_DAYS).mean()
monthly_rolling_average_price = df.groupby(level=0)['Rolling_Monthly_Mean_Price'].rolling(MONTHLY_TRADING_DAYS, min_periods=MONTHLY_TRADING_DAYS).mean()
yearly_rolling_average_price = df.groupby(level=0)['Rolling_Yearly_Mean_Price'].rolling(YEARLY_TRADING_DAYS, min_periods=YEARLY_TRADING_DAYS).mean()

In [191]:
# copy values into the working stocks dataframe
df['Rolling_Monthly_Mean_Positive_Days'] = rolling_monthly_up_days.values
df['Rolling_Yearly_Mean_Positive_Days'] = rolling_yearly_up_days.values
df['Rolling_Monthly_Mean_Price'] = monthly_rolling_average_price.values
df['Rolling_Yearly_Mean_Price'] = yearly_rolling_average_price.values

In [192]:
# Convert to csv
df.to_csv("data/moving-avg-momentum.csv", encoding='utf-8', index=True)
