In [1]:
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
import os
from os import walk
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
from datetime import datetime,timedelta
import talib as ta
from sklearn.preprocessing import MinMaxScaler
from math import floor
import sys
from sklearn.metrics import mean_squared_error as MSE

In [2]:
companies = []
data_path = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs/"
for (dirpath, dirnames, filenames) in walk(data_path):
    companies.extend(filenames)
    break 

In [15]:
def company(stock,plot=False):
    model = "models/{}-model.json".format(stock)
    if not os.path.isfile("{}/saved_model.pb".format(model)):
        return 0,0,0,0


    dataPath = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs"
    dataTrain = pd.read_csv("{}/{}.csv".format(dataPath,stock)) #import csv
    dataTrain['Datetime']= pd.to_datetime(dataTrain['Datetime'])
    rsiPeriod = 14
    adxPeriod = 14
    shift = 1
    leftshift = 33
    window = 60

    # %%
    variablesToInclude = ['Close','Volume',"RSI","ADX","fastd","fastk","macd"]
    numberOfFeatures = len(variablesToInclude)
    
    trainingDataPoints = round(len(dataTrain) * .4)
    
    dataTrain.rename(columns = {'CLOSE':'Close'}, inplace = True)
    dataTrain.rename(columns = {'HIGH':'High'}, inplace = True)
    dataTrain.rename(columns = {'LOW':'Low'}, inplace = True)
    dataTrain.rename(columns = {'VOLUME':'Volume'}, inplace = True)


    # %%

    dataTrain['Close'] = dataTrain['Close'].astype(float).fillna(0)
    data = dataTrain['Close']

    dataTrain["RSI"] = ta.RSI(dataTrain['Close'],rsiPeriod).fillna(0)
    dataTrain["ADX"] = ta.ADX(dataTrain['High'],dataTrain['Low'],dataTrain['Close'],adxPeriod).fillna(0)
    fastk, fastd = ta.STOCHF(dataTrain['High'],dataTrain['Low'],dataTrain['Close'])
    dataTrain['fastd'] = fastd
    dataTrain['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTrain['Close'])
    dataTrain['macd'] = macd
    dataTrain['macdsignal'] = macdsignal
    dataTrain['macdhist'] = macdhist
    upper,middle,lower = ta.BBANDS(dataTrain['Close'])
    dataTrain['bb_lowerband'] = lower
    dataTrain['bb_middleband'] = middle
    dataTrain['bb_upperband'] = upper

    # %%
    newTrain = pd.DataFrame()
    newTrain = dataTrain[variablesToInclude]
    newTrain["Close"] = newTrain['Close'].shift(1)

    # %%

    trainingSet = newTrain.iloc[:,0:numberOfFeatures].values #convert to numpy to train RNN
    ySet = data.astype(float).values.reshape(-1, 1)

    # %% [markdown]
    # ## Feature Scaling

    # %%
    # Use normalization x - min(x) / max(min) - min(x)
    sc = MinMaxScaler(feature_range=(0,1)) # all values between 0 and 1
    ySC = MinMaxScaler(feature_range=(0,1))
    ySCSet = ySC.fit_transform(ySet)
    scaleTrainingSet = sc.fit_transform(trainingSet)


    
    regressor=tf.keras.models.load_model(model)
    dataTest = pd.read_csv("{}/{}.csv".format(dataPath,stock)) #import csv
    dataTest.rename(columns = {'CLOSE':'Close'}, inplace = True)
    dataTest.rename(columns = {'HIGH':'High'}, inplace = True)
    dataTest.rename(columns = {'LOW':'Low'}, inplace = True)
    dataTest.rename(columns = {'VOLUME':'Volume'}, inplace = True)

    # %%
    dataTest['Datetime']= pd.to_datetime(dataTest['Datetime'])
    dataTest['Close'] = dataTest['Close'].astype(float)
    dataTest["RSI"] = ta.RSI(dataTest['Close'],rsiPeriod)
    dataTest["ADX"] = ta.ADX(dataTest['High'],dataTest['Low'],dataTest['Close'],adxPeriod)
    fastk, fastd = ta.STOCHF(dataTest['High'],dataTest['Low'],dataTest['Close'])
    dataTest['fastd'] = fastd
    dataTest['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTest['Close'])
    dataTest['macd'] = macd
    dataTest['macdsignal'] = macdsignal
    dataTest['macdhist'] = macdhist
    upper,middle,lower = ta.BBANDS(dataTest['Close'])
    dataTest['bb_lowerband'] = lower
    dataTest['bb_middleband'] = middle
    dataTest['bb_upperband'] = upper
    

    
    # %%
    newTest = pd.DataFrame()
    newTest = dataTest[variablesToInclude]
    
    # %%
    realStockPrice = dataTest['Close'].values #convert to numpy to train RNN
    newTest["Close"] = newTest['Close'].shift(1)
    trainingSet = newTest.iloc[:,0:numberOfFeatures].values #convert to numpy to train RNN
    realStockPrice = realStockPrice[window+leftshift:]

    # %% [markdown]
    # ## Predict price

    # %%

    inputs = trainingSet
    inputs = sc.transform(inputs)

    # %%
    xTest = []
    for i in range(window+leftshift,len(inputs)):
        xTest.append(inputs[i-window:i])
    xTest = np.array(xTest)
    xTest = np.reshape(xTest, (xTest.shape[0],xTest.shape[1],numberOfFeatures)) #batchsize, inputSize, numberOfFeatures

    # %%
    predictedPrice = regressor.predict(xTest)
    predictedPrice = ySC.inverse_transform(predictedPrice)
    
    if plot:
        plt.plot(realStockPrice, color = 'red', label = "Real Stock Price")
        plt.plot(predictedPrice, color = 'blue', label = "Predicted Stock Price")
        plt.title("{} Stock Price".format(stock))
        plt.xlabel('Time')
        plt.ylabel("Price")
        plt.legend()
        plt.show()

    mse = MSE(realStockPrice,predictedPrice)
    print("Company: {}\nMSE: {}".format(stock,mse))

    return mse


In [16]:
def normalize(value, sum,length):
    
    return (value / sum)


def negativeMSE(value, max):
    return (-1 * value)


In [17]:
def negativeMSE(value, max):
    return (-1 * value)

In [21]:
weights = pd.read_csv("Weights.csv")

In [30]:
weights = weights[weights['MSE'] < 10]
weights.describe()

Unnamed: 0,MSE
count,95.0
mean,3.942435
std,2.866912
min,0.175284
25%,1.418893
50%,3.774146
75%,6.147478
max,9.933051


In [31]:
weights[weights['MSE']==weights['MSE'].min()]

Unnamed: 0,Company,MSE
355,PBCT,0.175284


In [32]:
length = len(weights) 
maxMSE = weights['MSE'].max()
maxMSE

9.933051159801265

In [33]:
weights["Neg MSE"] = weights.apply(lambda row: negativeMSE(row['MSE'], maxMSE), axis=1)
weights['Transformed MSE'] = MinMaxScaler().fit_transform(weights['Neg MSE'].values.reshape(-1,1))

In [34]:
weights.describe()

Unnamed: 0,MSE,Neg MSE,Transformed MSE
count,95.0,95.0,95.0
mean,3.942435,-3.942435,0.613933
std,2.866912,2.866912,0.293808
min,0.175284,-9.933051,0.0
25%,1.418893,-6.147478,0.387955
50%,3.774146,-3.774146,0.63118
75%,6.147478,-1.418893,0.872552
max,9.933051,-0.175284,1.0


In [35]:
sums = weights['Transformed MSE'].sum()
print(sums)
weights['Weight'] = weights.apply(lambda row: normalize(row['Transformed MSE'], sums, length), axis=1)
weights


58.32364369904557


Unnamed: 0,Company,MSE,Neg MSE,Transformed MSE,Weight
0,AAL,1.896661,-1.896661,0.823589,0.014121
13,AEE,6.544320,-6.544320,0.347285,0.005954
15,AES,0.727853,-0.727853,0.943371,0.016175
27,AMCR,0.204176,-0.204176,0.997039,0.017095
53,BAC,2.030901,-2.030901,0.809832,0.013885
...,...,...,...,...,...
484,WMB,7.036169,-7.036169,0.296880,0.005090
487,WRK,6.381988,-6.381988,0.363922,0.006240
489,WU,0.241728,-0.241728,0.993191,0.017029
490,WY,3.880638,-3.880638,0.620266,0.010635


In [36]:
weights['Weight'].sum()

1.0

0.9999999999999999

In [14]:
weights['Company']

0       AAL
3      ABBV
4       ABC
6       ABT
9       ADI
       ... 
495    XRAY
496     XYL
497     YUM
498     ZBH
500    ZION
Name: Company, Length: 341, dtype: object

In [None]:
weights = pd.read_csv("Weights.csv")
weights = weights[weights['MSE'] < 500]

length = len(weights)
maxMSE = weights['MSE'].max()

weights["Neg MSE"] = weights.apply(
    lambda row: negativeMSE(row['MSE'], maxMSE), axis=1)
weights['Transformed MSE'] = MinMaxScaler().fit_transform(
    weights['Neg MSE'].values.reshape(-1, 1))
sums = weights['Transformed MSE'].sum()

weights['Weight'] = weights.apply(lambda row: normalize(
    row['Transformed MSE'], sums, length), axis=1)
weights['Weight'].sum()
