In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime,timedelta
from scipy import stats
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import sklearn as sk
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.models import load_model
from keras.wrappers.scikit_learn import KerasRegressor
from tensorflow import keras
import tensorflow as tf
import yfinance as yf
import seaborn as sn
import pandas_datareader.data as reader

In [None]:
#Get Data
ticker = str(input('Please input ticker'))
equity_data = yf.download(ticker, start =  '2005-01-01', end = '2020-09-01', interval = '1d', auto_adjust = 'True')
print(equity_data.columns)
equity_data['Pct_Returns']= equity_data["Close"].pct_change(1)
equity_data['Pct_Returns']`
equity_data['Raw_Returns'] = equity_data['Pct_Returns']*100
equity_data['Log_Returns'] = np.log(equity_data.Close) - np.log(equity_data.Close.shift(1))
equity_data.tail()


In [None]:
factors = reader.DataReader('F-F_Research_Data_Factors','famafrench',start =  '2005-01-01', end = '2020-08-31')
factors = factors[0]
equity_returns_mt = equity_data['Pct_Returns'].resample('M').agg(lambda x:(x+1).prod()-1)
#equity_returns_mt = equity_returns_mt[:-1]
equity_returns_mt.index = factors.index
print(factors.head())
print(equity_returns_mt.head())
print(factors.shape)
print(equity_returns_mt.shape)
full_dataset = pd.merge(factors,equity_returns_mt,on = 'Date')
full_dataset[['Mkt-RF','SMB','HML','RF']]= full_dataset[['Mkt-RF','SMB','HML','RF']]/100 #coverting from percent to raw value 
full_dataset['Excess_Returns']=full_dataset['Pct_Returns'] - full_dataset['RF'] #Excess Return = Portfolio - RF
full_dataset=full_dataset.rename(columns = {'Pct_Returns':ticker + ' Pct_Returns'})
full_dataset.shape

In [None]:
#closing price
plt.title(str(ticker) +' Log Returns')
plt.plot(equity_data['Log_Returns'])
plt.xlabel('Date')
plt.ylabel("Log Returns")
plt.show()

In [None]:
full_dataset.shape
full_dataset

In [None]:
#amount of data to train,
forecast = 12
training_close_length = (len(full_dataset)- 12)/len(full_dataset)
#training_close_length
print((math.ceil((1/20)*len(full_dataset))))

In [None]:
y_values = full_dataset["Excess_Returns"]
x_values = full_dataset.iloc[:,0:3]
x_train,x_test,y_train,y_test = train_test_split(x_values, y_values, test_size = (math.ceil((1/20)*len(full_dataset))),
                                                 shuffle = False,
                                                 )
x_train.iloc[:len(x_train),:]
(x_train)

In [None]:
#Scaling
sc = MinMaxScaler((-1,1))
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)
x_train, x_test = np.array(x_train), np.array(x_test)

In [None]:
y_train,y_test = np.array(y_train).reshape(-1,1), np.array(y_test).reshape(-1,1)
y_train = sc.fit_transform(y_train)
y_test = sc.fit_transform(y_test)
len(y_train)

In [None]:
#MODEl 
def define_model(lr=.01,
                 nodes_2=64,
                 nodes_3=36,
                 drop_rate_1 = .3,
                 drop_rate_2 = .3,
                 activation_2='sigmoid', activation_3='sigmoid'):
    model = Sequential ()
    #model.add(Dense(nodes_1,input_dim = 3,
    #              activation = activation_1,)),
    #model.add(Dropout(.3)),
    model.add(Dense(nodes_2,
                    input_dim = 3,
                   activation = activation_2)),
    model.add(Dropout(rate=drop_rate_1)),
    model.add(Dense(nodes_3,
                    #input_dim = 3,
                   activation = activation_3)),
    model.add(Dropout(rate=drop_rate_2)),
    model.add(Dense(1,
             activation = "linear")) #Dense --> Default activation is linear
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                  loss = 'mean_squared_error',
                  metrics =['mse']) #metrics =[tf.keras.metrics.AUC()]
    return model 

In [None]:
model = define_model(lr = .0001,
                     nodes_2 = 64,
                     nodes_3 = 36
                     activation_2='tanh',
                     activation_3='sigmoid',
                     drop_rate_1 = .3,
                     drop_rate_2 = .3)
model.summary()

In [None]:
model_tuning = KerasRegressor(build_fn = define_model,
                              #epochs=80,batch_size=64,
                              lr = .0001,
                              #nodes_1 = 64, 
                              #nodes_2=24, 
                              #nodes_3=8,
                              #activation_1='tanh',
                              #activation_2='sigmoid', 
                              #activation_3='sigmoid',
                              drop_rate_1 =.2,
                              drop_rate_2 = .2)

In [None]:
#HyperParameter Optimization
lr = [.001,.005,.0001]
#units = [[25,30],[15,10]]
#optimizer = ['SGD','ADAM']
#batch_size = [64]
#epochs=[80]
#n_nodes_1 = [64]
#n_nodes_2 = [32,24]
#n_nodes_3 = [16,8]
#activation_1 = ['sigmoid','tanh'] 
#activation_2 = ['sigmoid','tanh']
#activation_3 = ['sigmoid','tanh']
drop_rate_1 = [.2,.3,.4]
drop_rate_2 = [.2,.3,.4]
param_grid = dict(#optimizer = optimizer,
                  #batch_size= batch_size,
                  #epochs=epochs,
                  drop_rate_1 = drop_rate_1,
                  drop_rate_2 = drop_rate_2,
                  lr = lr
                  #nodes_1 = n_nodes_1,
                  #nodes_2= n_nodes_2,
                  #nodes_3 = n_nodes_3,
                  #activation_1 = activation_1,
                  #activation_2 = activation_2,
                  #activation_3 = activation_3
                 )
grid = GridSearchCV(estimator = model_tuning,param_grid = param_grid)
grid_result = grid.fit(x_train,y_train)

In [None]:
results = pd.DataFrame(grid_result.cv_results_)
results.to_csv('FF_Dense_HyperTuning')
print("Best: %s" % (grid_result.best_params_))
results;

In [None]:
EPOCHS = 130 #HOW MANY TIMES IS THE DATA FED THROUGH THE SYSTEM
BATCH_SIZE = 64  #HOW MUCH OF THE TRAINING DATA IS PUT THROUGH BEFORE WEIGHT UPDATES
VALIDATION_SPLIT = .2 #HOW MUCH OF THE TRAINING SET IS SET ASIDE TO VALIDATE
call_backs = [
    tf.keras.callbacks.CSVLogger('FF_Dense.log',
                                 separator = ',',
                                 append = False), #Export performance to CSV
    tf.keras.callbacks.EarlyStopping(monitor = 'mse',
                                     patience = 30, 
                                     restore_best_weights = True )]

model.fit(x_train, y_train,batch_size= BATCH_SIZE,
          epochs =EPOCHS,
          callbacks = call_backs,
          validation_split= VALIDATION_SPLIT)


In [None]:
train_log = pd.read_csv('FF_Dense.log',index_col = 'epoch')
train_log.val_loss.plot(legend=True)
train_log.loss.plot(legend=True)#val_loss vs epoch

In [None]:
model.save('AAPL_Dense_MODEL')

In [None]:
#PREDICTION
import_model = load_model('Citi_Dense_MODEL') #load model
train_prediction_unscaled = import_model.predict(x_train)
test_prediction_unscaled = import_model.predict(x_test)
train_prediction_unscaledtrain_prediction = sc.inverse_transform(train_prediction_unscaled) #PREDICTIONS USING TRAIN DATA
test_prediction = sc.inverse_transform(test_prediction_unscaled) #PREDICTIONS USING TEST DATA
test_prediction
Citi_Predictions_Train = pd.DataFrame(train_prediction)
Citi_Predictions_Test = pd.DataFrame(test_prediction)
Citi_Predictions_Test.columns,Citi_Predictions_Train.columns = ['Citi_Predictions'], ['Citi_Predictions']
#Citi_Predictions_Test.to_csv("Citi_Predictions_Test")
#Citi_Predictions_Train.to_csv("Citi_Prediction_Train")

In [None]:
#RMSE
#rmse_test = np.sqrt(np.mean(test_prediction - y_test)**2)
rmse_train = np.sqrt(np.mean(train_prediction - y_train)**2)
print(('RMSE for the Test set is ') + str(rmse_test))
print(('RMSE for the Train set is ') + str(rmse_train))

In [None]:
#Googl_to_CSV
import_model = load_model('AAPL_Dense_MODEL') #load model
train_prediction_unscaled = import_model.predict(x_train)
test_prediction_unscaled = import_model.predict(x_test)
train_prediction = sc.inverse_transform(train_prediction_unscaled) #PREDICTIONS USING TRAIN DATA
test_prediction = sc.inverse_transform(test_prediction_unscaled) #PREDICTIONS USING TEST DATA
GOOGL_Predictions_Train = pd.DataFrame(train_prediction)
GOOGL_Predictions_Test = pd.DataFrame(test_prediction)
GOOGL_Predictions_Test.columns,GOOGL_Predictions_Train.columns = ['GOOGL_Predictions'], ['GOOGL_Predictions']
GOOGL_Predictions_Test.to_csv("GOOGL_Predictions_Test")
GOOGL_Predictions_Train.to_csv("GOOGL_Prediction_Train")
test_prediction

In [None]:
#AAPL_to_CSV
import_model = load_model('AAPL_Dense_MODEL') #load model
train_prediction_unscaled = import_model.predict(x_train)
test_prediction_unscaled = import_model.predict(x_test)
train_prediction = sc.inverse_transform(train_prediction_unscaled) #PREDICTIONS USING TRAIN DATA
test_prediction = sc.inverse_transform(test_prediction_unscaled) #PREDICTIONS USING TEST DATA
AAPL_Predictions_Train = pd.DataFrame(train_prediction)
AAPL_Predictions_Test = pd.DataFrame(test_prediction)
AAPL_Predictions_Test.columns,AAPL_Predictions_Train.columns = ['AAPL_Predictions'], ['AAPL_Predictions']
AAPL_Predictions_Test.to_csv("AAPL_Predictions_Test")
AAPL_Predictions_Train.to_csv("AAPL_Prediction_Train")
test_prediction

In [None]:
#COKE_to_CSV
import_model = load_model('Citi_Dense_MODEL') #load model
train_prediction_unscaled = import_model.predict(x_train)
test_prediction_unscaled = import_model.predict(x_test)
train_prediction = sc.inverse_transform(train_prediction_unscaled) #PREDICTIONS USING TRAIN DATA
test_prediction = sc.inverse_transform(test_prediction_unscaled) #PREDICTIONS USING TEST DATA
Coke_Predictions_Train = pd.DataFrame(train_prediction)
Coke_Predictions_Test = pd.DataFrame(test_prediction)
Coke_Predictions_Test.columns,Coke_Predictions_Train.columns = ['Coke_Predictions'], ['Coke_Predictions']
Coke_Predictions_Test.to_csv("Coke_Predictions_Test")
Coke_Predictions_Train.to_csv("Coke_Prediction_Train")

In [None]:
#Tesla_to_CSV
import_model = load_model('Citi_Dense_MODEL') #load model
train_prediction_unscaled = import_model.predict(x_train)
test_prediction_unscaled = import_model.predict(x_test)
test_prediction_unscaled
Tesla_predictions = pd.DataFrame(test_prediction,prediction_time_interval)
Tesla_predictions.columns = ['Citi_Estimates']
Tesla_predictions.to_csv("Tesla_Prediction_01-01-2020_onwards")
Tesla_predictions.plot()
Tesla_predictions.describe