In [2]:
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
import os
from os import walk
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
from datetime import datetime,timedelta
import talib as ta
from sklearn.preprocessing import MinMaxScaler
from math import floor
import sys
from sklearn.metrics import mean_squared_error as MSE

In [3]:
companies = []
data_path = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs/"
for (dirpath, dirnames, filenames) in walk(data_path):
    companies.extend(filenames)
    break 

In [4]:
def company(stock,plot=False):
    model = "models/{}-model.json".format(stock)
    if not os.path.isfile("{}/saved_model.pb".format(model)):
        return 0,0,0,0


    dataPath = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs"
    dataTrain = pd.read_csv("{}/{}.csv".format(dataPath,stock)) #import csv
    dataTrain['Datetime']= pd.to_datetime(dataTrain['Datetime'])
    rsiPeriod = 14
    adxPeriod = 14
    shift = 1
    leftshift = 33
    window = 60

    # %%
    variablesToInclude = ['Close','Volume',"RSI","ADX","fastd","fastk","macd"]
    numberOfFeatures = len(variablesToInclude)
    
    trainingDataPoints = round(len(dataTrain) * .4)
    
    dataTrain.rename(columns = {'CLOSE':'Close'}, inplace = True)
    dataTrain.rename(columns = {'HIGH':'High'}, inplace = True)
    dataTrain.rename(columns = {'LOW':'Low'}, inplace = True)
    dataTrain.rename(columns = {'VOLUME':'Volume'}, inplace = True)


    # %%

    dataTrain['Close'] = dataTrain['Close'].astype(float).fillna(0)
    data = dataTrain['Close']

    dataTrain["RSI"] = ta.RSI(dataTrain['Close'],rsiPeriod).fillna(0)
    dataTrain["ADX"] = ta.ADX(dataTrain['High'],dataTrain['Low'],dataTrain['Close'],adxPeriod).fillna(0)
    fastk, fastd = ta.STOCHF(dataTrain['High'],dataTrain['Low'],dataTrain['Close'])
    dataTrain['fastd'] = fastd
    dataTrain['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTrain['Close'])
    dataTrain['macd'] = macd
    dataTrain['macdsignal'] = macdsignal
    dataTrain['macdhist'] = macdhist
    upper,middle,lower = ta.BBANDS(dataTrain['Close'])
    dataTrain['bb_lowerband'] = lower
    dataTrain['bb_middleband'] = middle
    dataTrain['bb_upperband'] = upper

    # %%
    newTrain = pd.DataFrame()
    newTrain = dataTrain[variablesToInclude]
    newTrain["Close"] = newTrain['Close'].shift(1)

    # %%

    trainingSet = newTrain.iloc[:,0:numberOfFeatures].values #convert to numpy to train RNN
    ySet = data.astype(float).values.reshape(-1, 1)

    # %% [markdown]
    # ## Feature Scaling

    # %%
    # Use normalization x - min(x) / max(min) - min(x)
    sc = MinMaxScaler(feature_range=(0,1)) # all values between 0 and 1
    ySC = MinMaxScaler(feature_range=(0,1))
    ySCSet = ySC.fit_transform(ySet)
    scaleTrainingSet = sc.fit_transform(trainingSet)


    
    regressor=tf.keras.models.load_model(model)
    dataTest = pd.read_csv("{}/{}.csv".format(dataPath,stock)) #import csv
    dataTest.rename(columns = {'CLOSE':'Close'}, inplace = True)
    dataTest.rename(columns = {'HIGH':'High'}, inplace = True)
    dataTest.rename(columns = {'LOW':'Low'}, inplace = True)
    dataTest.rename(columns = {'VOLUME':'Volume'}, inplace = True)

    # %%
    dataTest['Datetime']= pd.to_datetime(dataTest['Datetime'])
    dataTest['Close'] = dataTest['Close'].astype(float)
    dataTest["RSI"] = ta.RSI(dataTest['Close'],rsiPeriod)
    dataTest["ADX"] = ta.ADX(dataTest['High'],dataTest['Low'],dataTest['Close'],adxPeriod)
    fastk, fastd = ta.STOCHF(dataTest['High'],dataTest['Low'],dataTest['Close'])
    dataTest['fastd'] = fastd
    dataTest['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTest['Close'])
    dataTest['macd'] = macd
    dataTest['macdsignal'] = macdsignal
    dataTest['macdhist'] = macdhist
    upper,middle,lower = ta.BBANDS(dataTest['Close'])
    dataTest['bb_lowerband'] = lower
    dataTest['bb_middleband'] = middle
    dataTest['bb_upperband'] = upper
    

    
    # %%
    newTest = pd.DataFrame()
    newTest = dataTest[variablesToInclude]
    
    # %%
    realStockPrice = dataTest['Close'].values #convert to numpy to train RNN
    newTest["Close"] = newTest['Close'].shift(1)
    trainingSet = newTest.iloc[:,0:numberOfFeatures].values #convert to numpy to train RNN
    realStockPrice = realStockPrice[window+leftshift:]

    # %% [markdown]
    # ## Predict price

    # %%

    inputs = trainingSet
    inputs = sc.transform(inputs)

    # %%
    xTest = []
    for i in range(window+leftshift,len(inputs)):
        xTest.append(inputs[i-window:i])
    xTest = np.array(xTest)
    xTest = np.reshape(xTest, (xTest.shape[0],xTest.shape[1],numberOfFeatures)) #batchsize, inputSize, numberOfFeatures

    # %%
    predictedPrice = regressor.predict(xTest)
    predictedPrice = ySC.inverse_transform(predictedPrice)
    
    if plot:
        plt.plot(realStockPrice, color = 'red', label = "Real Stock Price")
        plt.plot(predictedPrice, color = 'blue', label = "Predicted Stock Price")
        plt.title("{} Stock Price".format(stock))
        plt.xlabel('Time')
        plt.ylabel("Price")
        plt.legend()
        plt.show()

    mse = MSE(realStockPrice,predictedPrice)
    print("Company: {}\nMSE: {}".format(stock,mse))

    return mse


In [253]:
def normalize(value, sum,length):
    
    return (value / sum)


def negativeMSE(value, max):
    return (-1 * value)


In [277]:
def negativeMSE(value, max):
    return (-1 * value)

In [255]:
weights = pd.DataFrame(columns=['Company','MSE'])
for i in companies:
    i = i[:-4]
    mse = company(i)
    values = [i,mse]
    weights.append(values)

KeyboardInterrupt: 

In [303]:
weights = pd.read_csv("Weights.csv")

In [316]:
weights = weights[weights['MSE'] < 500]
weights.describe()

Unnamed: 0,MSE,Neg MSE,Transformed MSE,Weight
count,341.0,341.0,341.0,341.0
mean,106.801851,-106.801851,0.891541,0.002791
std,124.016522,124.016522,0.126147,0.000395
min,0.175284,-499.515904,0.492079,0.00154
25%,8.285399,-171.420232,0.825813,0.002585
50%,55.657653,-55.657653,0.943564,0.002954
75%,171.420232,-8.285399,0.991751,0.003105
max,499.515904,-0.175284,1.0,0.003131


In [317]:
length = len(weights) 
maxMSE = weights['MSE'].max()
maxMSE

499.5159038371706

In [318]:
weights["Neg MSE"] = weights.apply(lambda row: negativeMSE(row['MSE'], maxMSE), axis=1)
weights['Transformed MSE'] = MinMaxScaler().fit_transform(weights['Neg MSE'].values.reshape(-1,1))

In [319]:
weights.describe()

Unnamed: 0,MSE,Neg MSE,Transformed MSE,Weight
count,341.0,341.0,341.0,341.0
mean,106.801851,-106.801851,0.786465,0.002791
std,124.016522,124.016522,0.248361,0.000395
min,0.175284,-499.515904,0.0,0.00154
25%,8.285399,-171.420232,0.657058,0.002585
50%,55.657653,-55.657653,0.888889,0.002954
75%,171.420232,-8.285399,0.983758,0.003105
max,499.515904,-0.175284,1.0,0.003131


In [320]:
sums = weights['Transformed MSE'].sum()
print(sums)
weights['Weight'] = weights.apply(lambda row: normalize(row['Transformed MSE'], sums, length), axis=1)
weights


268.1846549053848


Unnamed: 0,Company,MSE,Neg MSE,Transformed MSE,Weight
0,AAL,1.896661,-1.896661,0.996553,0.003716
3,ABBV,45.799721,-45.799721,0.908631,0.003388
4,ABC,414.449656,-414.449656,0.170357,0.000635
6,ABT,204.418710,-204.418710,0.590974,0.002204
9,ADI,179.118160,-179.118160,0.641642,0.002393
...,...,...,...,...,...
495,XRAY,294.357118,-294.357118,0.410859,0.001532
496,XYL,183.157489,-183.157489,0.633552,0.002362
497,YUM,314.671135,-314.671135,0.370178,0.001380
498,ZBH,67.259732,-67.259732,0.865654,0.003228


In [321]:
weights['Weight'].sum()

0.9999999999999999

In [322]:
weights = pd.read_csv("Weights.csv")
weights = weights[weights['MSE'] < 500]

length = len(weights)
maxMSE = weights['MSE'].max()

weights["Neg MSE"] = weights.apply(
    lambda row: negativeMSE(row['MSE'], maxMSE), axis=1)
weights['Transformed MSE'] = MinMaxScaler().fit_transform(
    weights['Neg MSE'].values.reshape(-1, 1))
sums = weights['Transformed MSE'].sum()

weights['Weight'] = weights.apply(lambda row: normalize(
    row['Transformed MSE'], sums, length), axis=1)
weights['Weight'].sum()

0.9999999999999999

In [323]:
weights['Company']

0       AAL
3      ABBV
4       ABC
6       ABT
9       ADI
       ... 
495    XRAY
496     XYL
497     YUM
498     ZBH
500    ZION
Name: Company, Length: 341, dtype: object