In [1]:
import warnings
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
import os
from os import walk
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
from datetime import datetime,timedelta
import talib as ta
from sklearn.preprocessing import MinMaxScaler
from math import floor
import sys
from sklearn.metrics import mean_squared_error as MSE

In [2]:
companies = []
data_path = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs/"
for (dirpath, dirnames, filenames) in walk(data_path):
    companies.extend(filenames)
    break 

In [3]:
def company(stock,plot=False):
    model = "Best/Models/{}-model.json".format(stock)
    if not os.path.isfile("{}/saved_model.pb".format(model)):
        return 0,0,0,0


    dataPath = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs"
    dataTrain = pd.read_csv("{}/{}.csv".format(dataPath,stock)) #import csv
    dataTrain['Datetime']= pd.to_datetime(dataTrain['Datetime'])
    rsiPeriod = 14
    adxPeriod = 14
    shift = 1
    leftshift = 33
    window = 60

    # %%
    variablesToInclude = ['Close','Volume',"RSI","ADX","fastd","fastk","macd"]
    numberOfFeatures = len(variablesToInclude)
    
    trainingDataPoints = round(len(dataTrain) * .4)
    
    dataTrain.rename(columns = {'CLOSE':'Close'}, inplace = True)
    dataTrain.rename(columns = {'HIGH':'High'}, inplace = True)
    dataTrain.rename(columns = {'LOW':'Low'}, inplace = True)
    dataTrain.rename(columns = {'VOLUME':'Volume'}, inplace = True)


    # %%

    dataTrain['Close'] = dataTrain['Close'].astype(float).fillna(0)
    data = dataTrain['Close']

    dataTrain["RSI"] = ta.RSI(dataTrain['Close'],rsiPeriod).fillna(0)
    dataTrain["ADX"] = ta.ADX(dataTrain['High'],dataTrain['Low'],dataTrain['Close'],adxPeriod).fillna(0)
    fastk, fastd = ta.STOCHF(dataTrain['High'],dataTrain['Low'],dataTrain['Close'])
    dataTrain['fastd'] = fastd
    dataTrain['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTrain['Close'])
    dataTrain['macd'] = macd
    dataTrain['macdsignal'] = macdsignal
    dataTrain['macdhist'] = macdhist
    upper,middle,lower = ta.BBANDS(dataTrain['Close'])
    dataTrain['bb_lowerband'] = lower
    dataTrain['bb_middleband'] = middle
    dataTrain['bb_upperband'] = upper

    # %%
    newTrain = pd.DataFrame()
    newTrain = dataTrain[variablesToInclude]
    newTrain["Close"] = newTrain['Close'].shift(1)

    # %%

    trainingSet = newTrain.iloc[:,0:numberOfFeatures].values #convert to numpy to train RNN
    ySet = data.astype(float).values.reshape(-1, 1)

    # %% [markdown]
    # ## Feature Scaling

    # %%
    # Use normalization x - min(x) / max(min) - min(x)
    sc = MinMaxScaler(feature_range=(0,1)) # all values between 0 and 1
    ySC = MinMaxScaler(feature_range=(0,1))
    ySCSet = ySC.fit_transform(ySet)
    scaleTrainingSet = sc.fit_transform(trainingSet)


    
    regressor=tf.keras.models.load_model(model)
    dataTest = pd.read_csv("{}/{}.csv".format(dataPath,stock)) #import csv
    dataTest.rename(columns = {'CLOSE':'Close'}, inplace = True)
    dataTest.rename(columns = {'HIGH':'High'}, inplace = True)
    dataTest.rename(columns = {'LOW':'Low'}, inplace = True)
    dataTest.rename(columns = {'VOLUME':'Volume'}, inplace = True)

    # %%
    dataTest['Datetime']= pd.to_datetime(dataTest['Datetime'])
    dataTest['Close'] = dataTest['Close'].astype(float)
    dataTest["RSI"] = ta.RSI(dataTest['Close'],rsiPeriod)
    dataTest["ADX"] = ta.ADX(dataTest['High'],dataTest['Low'],dataTest['Close'],adxPeriod)
    fastk, fastd = ta.STOCHF(dataTest['High'],dataTest['Low'],dataTest['Close'])
    dataTest['fastd'] = fastd
    dataTest['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTest['Close'])
    dataTest['macd'] = macd
    dataTest['macdsignal'] = macdsignal
    dataTest['macdhist'] = macdhist
    upper,middle,lower = ta.BBANDS(dataTest['Close'])
    dataTest['bb_lowerband'] = lower
    dataTest['bb_middleband'] = middle
    dataTest['bb_upperband'] = upper
    

    
    # %%
    newTest = pd.DataFrame()
    newTest = dataTest[variablesToInclude]
    
    # %%
    realStockPrice = dataTest['Close'].values #convert to numpy to train RNN
    newTest["Close"] = newTest['Close'].shift(1)
    trainingSet = newTest.iloc[:,0:numberOfFeatures].values #convert to numpy to train RNN
    realStockPrice = realStockPrice[window+leftshift:]

    # %% [markdown]
    # ## Predict price

    # %%

    inputs = trainingSet
    inputs = sc.transform(inputs)

    # %%
    xTest = []
    for i in range(window+leftshift,len(inputs)):
        xTest.append(inputs[i-window:i])
    xTest = np.array(xTest)
    xTest = np.reshape(xTest, (xTest.shape[0],xTest.shape[1],numberOfFeatures)) #batchsize, inputSize, numberOfFeatures

    # %%
    predictedPrice = regressor.predict(xTest)
    predictedPrice = ySC.inverse_transform(predictedPrice)

    
    
    if plot:
        plt.plot(realStockPrice, color = 'red', label = "Real Stock Price")
        plt.plot(predictedPrice, color = 'blue', label = "Predicted Stock Price")
        plt.title("{} Stock Price".format(stock))
        plt.xlabel('Time')
        plt.ylabel("Price")
        plt.legend()
        plt.show()

    mse = MSE(realStockPrice,predictedPrice)
    print("Company: {}\nMSE: {}".format(stock,mse))

    return mse


In [4]:
def normalize(value, sum,length):
    
    return (value / sum)


def negativeMSE(value, max):
    return (-1 * value)


In [5]:
def negativeMSE(value, max):
    return (-1 * value)

In [6]:
weights = pd.read_csv("Weights.csv")

In [7]:
weights = weights[weights['MSE'] < 50]
weights.describe()

Unnamed: 0,MSE
count,166.0
mean,14.002035
std,14.227171
min,0.175284
25%,2.95993
50%,7.574376
75%,22.895811
max,49.892501


In [8]:
weights[weights['MSE']==weights['MSE'].max()]

Unnamed: 0,Company,MSE
366,PHM,49.892501


In [21]:
length = len(weights) 
maxMSE = weights['MSE'].max()
maxMSE

832117.7572192006

In [22]:
weights["Neg MSE"] = weights.apply(lambda row: negativeMSE(row['MSE'], maxMSE), axis=1)
weights['Transformed MSE'] = MinMaxScaler().fit_transform(weights['Neg MSE'].values.reshape(-1,1))

In [23]:
weights.describe()

Unnamed: 0,MSE,Neg MSE,Transformed MSE
count,25.0,25.0,25.0
mean,99368.057938,-99368.057938,0.891809
std,193685.369872,193685.369872,0.235729
min,10473.615912,-832117.757219,0.0
25%,14259.469832,-79993.163315,0.91539
50%,16931.093874,-16931.093874,0.992141
75%,79993.163315,-14259.469832,0.995392
max,832117.757219,-10473.615912,1.0


In [13]:
sums = weights['Transformed MSE'].sum()
print(sums)
weights['Weight'] = weights.apply(lambda row: normalize(row['Transformed MSE'], sums, length), axis=1)
weights


36.46892390557873


Unnamed: 0,Company,MSE,Neg MSE,Transformed MSE,Weight
0,AAL,1.896661,-1.896661,0.642378,0.017614
15,AES,0.727853,-0.727853,0.885202,0.024273
27,AMCR,0.204176,-0.204176,0.993998,0.027256
53,BAC,2.030901,-2.030901,0.614489,0.016850
62,BK,1.100395,-1.100395,0.807805,0.022151
...,...,...,...,...,...
461,USB,4.447808,-4.447808,0.112369,0.003081
473,VZ,1.131016,-1.131016,0.801444,0.021976
489,WU,0.241728,-0.241728,0.986196,0.027042
490,WY,3.880638,-3.880638,0.230200,0.006312


In [14]:
weights['Weight'].sum()

1.0

0.9999999999999999

In [14]:
weights['Company']

0       AAL
3      ABBV
4       ABC
6       ABT
9       ADI
       ... 
495    XRAY
496     XYL
497     YUM
498     ZBH
500    ZION
Name: Company, Length: 341, dtype: object

In [None]:
weights = pd.read_csv("Weights.csv")
weights = weights[weights['MSE'] < 500]

length = len(weights)
maxMSE = weights['MSE'].max()

weights["Neg MSE"] = weights.apply(
    lambda row: negativeMSE(row['MSE'], maxMSE), axis=1)
weights['Transformed MSE'] = MinMaxScaler().fit_transform(
    weights['Neg MSE'].values.reshape(-1, 1))
sums = weights['Transformed MSE'].sum()

weights['Weight'] = weights.apply(lambda row: normalize(
    row['Transformed MSE'], sums, length), axis=1)
weights['Weight'].sum()


In [9]:
# %% [markdown]
# # Part1 - Preprocessing

# %% [markdown]


# %%
from tqdm import tqdm
import shutil
from pandas.core.common import SettingWithCopyWarning
import talib as ta
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
import imp
import absl.logging
import warnings
import csv
import random
import os
from os import walk
from eth_utils import combine_argument_formatters
from sklearn.metrics import mean_squared_error
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
absl.logging.set_verbosity(absl.logging.ERROR)


padding = "--------------------------------"
# %% [markdown]
# ## Import training set


def rnn(stock, startDate="1982-3-12", endDate="2022-02-1", lr=0.01, layer1=50, layer2=50, layer3=50, layer4=50):  # %%
    rsiPeriod = 14
    adxPeriod = 14
    bollingerBandWindow = 20
    shift = 1
    leftshift = 33
    window = 60

    # %%
    variablesToInclude = ['Close', 'Volume',
                          "RSI", "ADX", "fastd", "fastk", "macd"]
    numberOfFeatures = len(variablesToInclude)
    numberOfFeatures

    dataPath = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs"

    # %%
    dataTrain = pd.read_csv("{}/{}.csv".format(dataPath, stock))  # import csv
    temp = dataTrain
    
    
   

    trainingDataPoints = round(len(dataTrain) * .4)
    if trainingDataPoints < 93:
        return 0
    dataTrain.rename(columns={'CLOSE': 'Close'}, inplace=True)
    dataTrain.rename(columns={'HIGH': 'High'}, inplace=True)
    dataTrain.rename(columns={'LOW': 'Low'}, inplace=True)
    dataTrain.rename(columns={'VOLUME': 'Volume'}, inplace=True)

    # %%

    dataTrain['Close'] = dataTrain['Close'].astype(float).fillna(0)
    data = dataTrain['Close']

    dataTrain["RSI"] = ta.RSI(dataTrain['Close'], rsiPeriod).fillna(0)
    dataTrain["ADX"] = ta.ADX(
        dataTrain['High'], dataTrain['Low'], dataTrain['Close'], adxPeriod).fillna(0)
    fastk, fastd = ta.STOCHF(
        dataTrain['High'], dataTrain['Low'], dataTrain['Close'])
    dataTrain['fastd'] = fastd
    dataTrain['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTrain['Close'])
    dataTrain['macd'] = macd
    dataTrain['macdsignal'] = macdsignal
    dataTrain['macdhist'] = macdhist
    upper, middle, lower = ta.BBANDS(dataTrain['Close'])
    dataTrain['bb_lowerband'] = lower
    dataTrain['bb_middleband'] = middle
    dataTrain['bb_upperband'] = upper

    # %%
    newTrain = pd.DataFrame()
    newTrain = dataTrain[variablesToInclude]
    newTrain["Close"] = newTrain['Close'].shift(1)

    # %%

    # convert to numpy to train RNN
    trainingSet = newTrain.iloc[:, 0:numberOfFeatures].values
    ySet = data.astype(float).values.reshape(-1, 1)

    # %% [markdown]
    # ## Feature Scaling

    # %%
    # Use normalization x - min(x) / max(min) - min(x)
    sc = MinMaxScaler(feature_range=(0, 1))  # all values between 0 and 1
    ySC = MinMaxScaler(feature_range=(0, 1))
    ySCSet = ySC.fit_transform(ySet)
    scaleTrainingSet = sc.fit_transform(trainingSet)

    # %% [markdown]
    # ## Create a data structure woth 60 timesteps and 1 output

    # %%
    # Look at the 60 previous timesteps to predict this timestep
    xTrain = []
    yTrain = []
    for i in range(window+leftshift, trainingDataPoints):
        xTrain.append(scaleTrainingSet[i-window:i])
        yTrain.append(ySCSet[i])

    # %%
    # convert xtrain and yTrain to numpy for RNN
    xTrain, yTrain = np.array(xTrain), np.array(yTrain)

    # %% [markdown]
    # ## Reshaping the data

    # %%

    # batchsize, inputSize, numberOfFeatures
    xTrain = np.reshape(
        xTrain, (xTrain.shape[0], xTrain.shape[1], numberOfFeatures))

    # %% [markdown]
    # # Part 2 - Build RNN

    # %% [markdown]HHhH
    model = "Best/Models/{}-model.json".format(stock)
    if False:
        regressor = tf.keras.models.load_model(model)
        print("loaded")

    else:
        # %%
        regressor = Sequential()

        regressor.add(LSTM(units=layer1, return_sequences=True,
                           input_shape=(xTrain.shape[1], numberOfFeatures)))
        regressor.add(Dropout(rate=0.2))
        #regressor.add(Dense(units=16,activation = 'relu',input_shape = (xTrain.shape[1],numberOfFeatures)))

        regressor.add(LSTM(units=layer2))
        regressor.add(Dropout(rate=0.2))

        # Last Layer
        # output layer, default since this is regression not classfition
        regressor.add(Dense(units=1))

        # %% [markdown]
        # ## Adding output layer

        # %%
        optimizer = Adam(learning_rate=lr)
        regressor.compile(optimizer=optimizer, loss='mean_squared_error',
                          metrics='accuracy')

        # %%
        regressor.fit(xTrain, yTrain, epochs=20, batch_size=32, verbose=0)

        regressor.save(
            "Models-Testing/{}-{}-{}-model.json".format(stock, layer1, layer2))
    # %% [markdown]
    # ## Part 3 - Predictions and visualing the results

    # %%
    dataTest = temp
    dataTest.rename(columns={'CLOSE': 'Close'}, inplace=True)
    dataTest.rename(columns={'HIGH': 'High'}, inplace=True)
    dataTest.rename(columns={'LOW': 'Low'}, inplace=True)
    dataTest.rename(columns={'VOLUME': 'Volume'}, inplace=True)

    # %%
    dataTest['Datetime'] = pd.to_datetime(dataTest['Datetime'])
    startDate = datetime.strptime(str(startDate), "%Y-%m-%d")
    endDate = datetime.strptime(str(endDate), "%Y-%m-%d")
    if startDate < dataTest.iloc[0, :]['Datetime']:
        startDate = dataTest.iloc[0, :]['Datetime']
    
    dataTest['Close'] = dataTest['Close'].astype(float)
    dataTest["RSI"] = ta.RSI(dataTest['Close'], rsiPeriod)
    dataTest["ADX"] = ta.ADX(
        dataTest['High'], dataTest['Low'], dataTest['Close'], adxPeriod)
    fastk, fastd = ta.STOCHF(
        dataTest['High'], dataTest['Low'], dataTest['Close'])
    dataTest['fastd'] = fastd
    dataTest['fastk'] = fastk
    macd, macdsignal, macdhist = ta.MACD(dataTest['Close'])
    dataTest['macd'] = macd
    dataTest['macdsignal'] = macdsignal
    dataTest['macdhist'] = macdhist
    upper, middle, lower = ta.BBANDS(dataTest['Close'])
    dataTest['bb_lowerband'] = lower
    dataTest['bb_middleband'] = middle
    dataTest['bb_upperband'] = upper

    # %%
    newTest = pd.DataFrame()
    newTest = dataTest[variablesToInclude]

    # %%

    realStockPrice = dataTest['Close'].values  # convert to numpy to train RNN
    newTest["Close"] = newTest['Close'].shift(1)
    # convert to numpy to train RNN
    trainingSet = newTest.iloc[:, 0:numberOfFeatures].values
    realStockPrice = realStockPrice[window+leftshift:]

    # %% [markdown]
    # ## Predict price

    # %%

    inputs = trainingSet
    inputs = sc.transform(inputs)

    # %%
    xTest = []
    for i in range(window+leftshift, len(inputs)):
        xTest.append(inputs[i-window:i])
    xTest = np.array(xTest)
    # batchsize, inputSize, numberOfFeatures
    xTest = np.reshape(
        xTest, (xTest.shape[0], xTest.shape[1], numberOfFeatures))

    # %%
    predictedPrice = regressor.predict(xTest)
    predictedPrice = ySC.inverse_transform(predictedPrice)

    # %% [markdown]
    # # Visualsing the data

    # %%
    plt.plot(realStockPrice, color='red', label="Real Stock Price")
    plt.plot(predictedPrice, color='blue', label="Predicted Stock Price")
    plt.title("{} Stock Price".format(stock))
    plt.xlabel('Time')
    plt.ylabel("Price")
    plt.legend()
    plt.savefig('Graphs-Testing/{}-{}-{}.png'.format(stock, layer1, layer2))
    plt.figure()

    MSE = mean_squared_error(realStockPrice, predictedPrice)
    stocksOwned = {}
    liquidValue = 500000/505
    startingValue = liquidValue
    sold = 0
    for i in range(len(predictedPrice)):
        if realStockPrice[i] < predictedPrice[i] and 'GOOGL' not in stocksOwned:
            stocksOwned['GOOGL'] = (
                realStockPrice[i], liquidValue/realStockPrice[i])
            liquidValue -= liquidValue / \
                realStockPrice[i] * realStockPrice[i]
        elif 'GOOGL' in stocksOwned and stocksOwned['GOOGL'][0] < realStockPrice[i]:
            liquidValue += stocksOwned['GOOGL'][1] * realStockPrice[i]
            percentGain = (
                realStockPrice[i] - stocksOwned['GOOGL'][0]) / stocksOwned['GOOGL'][0]
            sold += 1

            stocksOwned.pop('GOOGL')
        if 'GOOGL' in stocksOwned and i == len(predictedPrice) - 1:
            liquidValue += stocksOwned['GOOGL'][1] * realStockPrice[i]
            percentGain = (
                realStockPrice[i] - stocksOwned['GOOGL'][0]) / stocksOwned['GOOGL'][0]
            stocksOwned.pop('GOOGL')
            sold += 1
    profit = liquidValue - startingValue
    return MSE



In [10]:

companies = []
stocks = {}
profit = 0
data_path = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/Datasets/30y_stock_csvs/"
for (dirpath, dirnames, filenames) in walk(data_path):
    companies.extend(filenames)
    break
companyLayers = {}
for company in tqdm(companies):
    company = company[:-4]
    if os.path.exists("Best/Graphs/{}.png".format(company)):
        continue
    print(padding)
    print(company)
    mse = float('inf')
    layers = ()
    year = random.randint(0, 82)
    leapyear = False
    if (year % 400 == 0) and (year % 100 == 0) or ((year % 4 == 0) and (year % 100 != 0)):
        leapyear = True
    if year > 21:
        year = "19{}".format(year)
    else:
        if year < 10:
            year = year = "200{}".format(year)
        else:
            year = "20{}".format(year)
    month = str(random.randint(1, 12))
    possibleDays = {'1': 31, '2': 28, '3': 31, '4': 30, '5': 31,
                    '6': 30, '7': 31, '8': 31, '9': 30, '10': 31, '11': 30, '12': 31}
    day = str(random.randint(1, possibleDays[month]))
    if leapyear and month is '2':
        day = str(random.randint(1, 29))

    startDate = '{}-{}-{}'.format(year, month, day)

    startDate = datetime.strptime(startDate, "%Y-%m-%d")

    days = timedelta(76)
    endDate = startDate + days
    startDate = str(startDate)[:10]
    endDate = str(endDate)[:10]
    for layer1 in range(1, 21):
        if company is 'CSCO':
            layers = (10, 1)
            break
        tempMse = rnn(company, startDate = startDate, endDate=endDate,layer1=layer1, layer2=1)
        if tempMse == 0:
            continue
        if tempMse < mse:
            layers = (layer1, 1)
            mse = tempMse
        print(tempMse)
    if not layers:
        continue
    layer1 = layers[0]
    for layer2 in range(1, 21):
        tempMse = rnn(company, startDate=startDate, endDate=endDate, layer1=layer1, layer2=layer2)
        if tempMse == 0:
            continue
        if tempMse < mse:
            layers = (layer1, layer2)
            mse = tempMse
        print(tempMse)
    layer1, layer2 = layers
    if mse == float('inf'):
        continue
    os.rename("Models-Testing/{}-{}-{}-model.json".format(company,
                                                          layer1, layer2), "Best/Models/{}-model.json".format(company))
    os.rename('Graphs-Testing/{}-{}-{}.png'.format(company, layer1,
                                                   layer2), "Best/Graphs/{}.png".format(company))
    dir = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/IshanCode/Models-Testing"
    for f in os.listdir(dir):
        path = os.path.join(dir, f)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)
    dir = "/Users/ishan/Coding/Wpi/StockMarketSimulationIQP/IshanCode/Graphs-Testing"
    for f in os.listdir(dir):
        path = os.path.join(dir, f)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)

# %%


  0%|          | 0/505 [00:00<?, ?it/s]


--------------------------------
OGN


TypeError: '<' not supported between instances of 'datetime.datetime' and 'str'