In [1]:
## This file is going to run models which will attempt to forecast price movement 
## using data from 2016 until Jan 31st, 2022. The first 1, 2, 5, and 10 trading days 
## of Februrary will be predicted. 

In [2]:
import pandas as pd

import glob

from pathlib import Path
import csv

import glob



In [3]:
# Machine-learning specific imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [4]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [5]:
import datetime

def get_data_updated(symbol,n_days):
    import_path = Path('../FilesExport_Updated_DFs_01_31/'+symbol+'_ti_df_no_ftd.pkl')
    data = load_obj(import_path)
    import_df = data[symbol]
    
    #df - import_df.copy()

    df_close = import_df[['close']]
    df_close = df_close.reset_index().rename(columns={"Date": "Close_Date"})    
    
    features_df = import_df.reset_index().drop(columns=['close','adjClose'])
    
    new_close_df = df_close.iloc[n_days: , :].reset_index(drop=True)
    
    last_date = new_close_df.iloc[-1]['Close_Date']
    last_price = new_close_df.iloc[-1]['close']

    for i in range(n_days):
        if i == 0:
            next_date = last_date + datetime.timedelta(days=1)
            if next_date.weekday() == 5:
                next_date = next_date + datetime.timedelta(days=2)
            d = {'Close_Date': [last_date], 'close': [last_price]}
            df1 = pd.DataFrame(data=d)
            

        else:
            next_date = next_date + datetime.timedelta(days=1)

            if next_date.weekday() == 5:
                next_date = next_date + datetime.timedelta(days=2)

            d2 = {'Close_Date': [next_date], 'close': [last_price]}
            df2 = pd.DataFrame(data=d2)

        ## Append to df1
            df1 = df1.append(df2,ignore_index=True)

    new_close_df = new_close_df.append(df1,ignore_index=True)
    
    return features_df, new_close_df

## Prepare Data for ML
def prepare_data_updated(symbol,n_days,return_data=False):
    features_df, new_close_df = get_data_updated(symbol,n_days)
    
    X = features_df.drop(columns={'Date'}).values
    y = new_close_df['close'].values
    
    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)
    
    if return_data == True:
        return X, y, features_df, new_close_df
    else:
        return X, y

In [6]:
## Takes one symbol and runs model data. 
## Call function for each individal symbol. No return data.  

def mean_squared_model(
    symbol,
    export_path,  ## Require export path to help avoiding re-writing models 
    n_days = 5, ## Default value 5, but should be tried with 1-30  
    model_count = 5, ## Number of times model runs before saving the best one. 
    validation_split_value=0.3, ## Default 0.3 for 70/30 split 
    epochs_value=400,
    units1 = 8,
    units2 = 8,
    num_of_inputs = 20,
    num_of_outputs= 1,
    model_type = 'NN'
    ):    

    X, y = prepare_data_updated(symbol,n_days)
    
    
    
    n_days_string = str(n_days) ## For exporting because can't concat 'int'
    accuracy_dict_symbol = str(symbol)+'_'+n_days_string
    export_path_prefix_lowAcc = export_path+'Low_Acc/'+symbol+'_'+model_type+'_'+n_days_string 
    export_path_prefix_highAcc = export_path+'High_Acc/'+symbol+'_'+model_type+'_'+n_days_string 


    for i in range(model_count):
        model_summary = {}
        ## Create Neural Network 

        # Define the model - deep neural network with two layers
        nn = Sequential()

        # First hidden layer
        nn.add(Dense(units=units1, input_dim=num_of_inputs, activation="relu"))

        # Second hidden layer
        nn.add(Dense(units=units2, activation="relu"))

        # Output layer
        nn.add(Dense(units=num_of_outputs, activation="linear"))

        # Compile the model
        nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

        # Fit the model
        model = nn.fit(X, y, 
                          validation_split=validation_split_value, 
                          epochs=epochs_value, 
                          verbose=0)
            
        ## End of if/elif

        model_loss, model_accuracy = nn.evaluate(X, y, verbose=0)
        
        model_summary = {
            'model_accuracy':model_accuracy,
            'n_days':n_days,
            'validation_split_value':validation_split_value,
            'epochs_value':epochs_value,
                        }
        
        ## Save first model 
        if i == 0:
            model_accuracy_high = model_accuracy
            model_accuracy_low = model_accuracy
            
            symbol_accuracy_dict[accuracy_dict_symbol] = {
                'model_accuracy':model_accuracy,
                'n_days':n_days
            }
            
            # Save model data Low_Acc
            nn_json = nn.to_json()
            file_path = Path(export_path_prefix_lowAcc+'_model_data.json')
            with open(file_path, "w") as json_file:
                json_file.write(nn_json)

            # Save weights
            file_path = (export_path_prefix_lowAcc+'_model_weights.h5')
            nn.save_weights(file_path)
            
            ## Save model summary 
            file_path = (export_path_prefix_lowAcc+'_model_summary.pkl')
            save_obj(model_summary,file_path)
            
            # Save model data High_Acc
            nn_json = nn.to_json()
            file_path = Path(export_path_prefix_highAcc+'_model_data.json')
            with open(file_path, "w") as json_file:
                json_file.write(nn_json)

            # Save weights
            file_path = (export_path_prefix_highAcc+'_model_weights.h5')
            nn.save_weights(file_path)
            
            ## Save model summary 
            file_path = (export_path_prefix_highAcc+'_model_summary.pkl')
            save_obj(model_summary,file_path)
            
        ## Rewrite saved model if accuracy better 
        else:
            if model_accuracy < model_accuracy_low:
                ## Rewrite values 
                model_accuracy_low = model_accuracy
                symbol_accuracy_dict[accuracy_dict_symbol] = {
                    'model_accuracy':model_accuracy,
                    'n_days':n_days
                }
                
                # Rewrite saved files
                
                ## Save model data
                nn_json = nn.to_json()
                file_path = Path(export_path_prefix_lowAcc+'_model_data.json')
                with open(file_path, "w") as json_file:
                    json_file.write(nn_json)

                # Save weights
                file_path = (export_path_prefix_lowAcc+'_model_weights.h5')
                nn.save_weights(file_path)
                
                ## Save model summary 
                file_path = (export_path_prefix_lowAcc+'_model_summary.pkl')
                save_obj(model_summary,file_path)
                
            if model_accuracy > model_accuracy_high:
                ## Rewrite values 
                model_accuracy_high = model_accuracy
                symbol_accuracy_dict[accuracy_dict_symbol] = {
                    'model_accuracy':model_accuracy,
                    'n_days':n_days
                }
                
                # Rewrite saved files
                
                ## Save model data
                nn_json = nn.to_json()
                file_path = Path(export_path_prefix_highAcc+'_model_data.json')
                with open(file_path, "w") as json_file:
                    json_file.write(nn_json)

                # Save weights
                file_path = (export_path_prefix_highAcc+'_model_weights.h5')
                nn.save_weights(file_path)
                
                ## Save model summary 
                file_path = (export_path_prefix_highAcc+'_model_summary.pkl')
                save_obj(model_summary,file_path)
    ## End of for loop 
    ## Return nothing 

In [7]:
## Import symbol list 
path = Path('../Resources/Updated_01_DF_success_list_01_31.pkl')
symbol_list = load_obj(path)
len(symbol_list)

767

In [8]:
## Create empty dict for accuracy data - only really needed when 
## looping multiple symbols or different models. 
symbol_accuracy_dict = {}
error_dict = {}
error_count = 0

for index in range(0,len(symbol_list)):
    symbol = symbol_list[index] 
    
    try:
        i = 1
        mean_squared_model(symbol,
                           export_path='../Model_Data/Feb2022_NN_models/',
                           n_days=i,
                           epochs_value=200,
                           validation_split_value=0.1,
                           num_of_inputs=19
                          )
        i = 2
        mean_squared_model(symbol,
                           export_path='../Model_Data/Feb2022_NN_models/',
                           n_days=i,
                           epochs_value=200,
                           validation_split_value=0.1,
                           num_of_inputs=19
                          )
        i = 5
        mean_squared_model(symbol,
                           export_path='../Model_Data/Feb2022_NN_models/',
                           n_days=i,
                           epochs_value=200,
                           validation_split_value=0.1,
                           num_of_inputs=19
                          )
        i = 10
        mean_squared_model(symbol,
                           export_path='../Model_Data/Feb2022_NN_models/',
                           n_days=i,
                           epochs_value=200,
                           validation_split_value=0.1,
                           num_of_inputs=19
                          )
    except:
        error_dict[error_count] = {
            'index':index,
            'symbol':symbol,
            'i':i
        }
        error_count += 1
        continue

## Export symbol_accuracy_dict
path = Path('../Resources/Feb2022_symbol_accuracy_dict.pkl')
save_obj(symbol_accuracy_dict,path)

In [9]:
symbol_accuracy_dict 

{'AAL_1': {'model_accuracy': 1.00552499294281, 'n_days': 1},
 'AAL_2': {'model_accuracy': 1.990746021270752, 'n_days': 2},
 'AAL_5': {'model_accuracy': 3.8584377765655518, 'n_days': 5},
 'AAL_10': {'model_accuracy': 7.194328308105469, 'n_days': 10},
 'AAU_1': {'model_accuracy': 0.001575897797010839, 'n_days': 1},
 'AAU_2': {'model_accuracy': 0.0029736601281911135, 'n_days': 2},
 'AAU_5': {'model_accuracy': 0.006354318931698799, 'n_days': 5},
 'AAU_10': {'model_accuracy': 0.010364985093474388, 'n_days': 10},
 'AAXJ_1': {'model_accuracy': 1.2384130954742432, 'n_days': 1},
 'AAXJ_2': {'model_accuracy': 1.6987038850784302, 'n_days': 2},
 'AAXJ_5': {'model_accuracy': 3.548926591873169, 'n_days': 5},
 'AAXJ_10': {'model_accuracy': 6.953153133392334, 'n_days': 10},
 'ABEO_1': {'model_accuracy': 0.17640310525894165, 'n_days': 1},
 'ABEO_2': {'model_accuracy': 0.37196213006973267, 'n_days': 2},
 'ABEO_5': {'model_accuracy': 0.7272997498512268, 'n_days': 5},
 'ABEO_10': {'model_accuracy': 1.0951

In [10]:
# ## EYEG messes up when predicting for n_days = 10 
# ## Adding try loop to code and continuing 

# for index in range(235,len(symbol_list)):
#     symbol = symbol_list[index] 
    
#     try:
#         i = 1
#         mean_squared_model(symbol,
#                            export_path='../Model_Data/Date_Test_NN_noFTD_all/',
#                            n_days=i,
#                            epochs_value=200,
#                            validation_split_value=0.1,
#                            num_of_inputs=19
#                           )
#         i = 2
#         mean_squared_model(symbol,
#                            export_path='../Model_Data/Date_Test_NN_noFTD_all/',
#                            n_days=i,
#                            epochs_value=200,
#                            validation_split_value=0.1,
#                            num_of_inputs=19
#                           )
#         i = 5
#         mean_squared_model(symbol,
#                            export_path='../Model_Data/Date_Test_NN_noFTD_all/',
#                            n_days=i,
#                            epochs_value=200,
#                            validation_split_value=0.1,
#                            num_of_inputs=19
#                           )
#         i = 10
#         mean_squared_model(symbol,
#                            export_path='../Model_Data/Date_Test_NN_noFTD_all/',
#                            n_days=i,
#                            epochs_value=200,
#                            validation_split_value=0.1,
#                            num_of_inputs=19
#                           )
#     except: 
#         continue

# ## Export symbol_accuracy_dict
# path = Path('../Resources/NN_noFTDall_symbol_list.pkl')
# save_obj(symbol_accuracy_dict,path)

In [11]:
# ## load model data 
# file_path = Path('../Model_Data/Date_Test_NN/'+symbol+'_NN_model_data.json')
# with open(file_path, "r") as json_file:
#     model_json = json_file.read()
# loaded_model = model_from_json(model_json)

# # load weights into new model
# file_path = Path('../Model_Data/Date_Test_NN/'+symbol+'_NN_model_weights.h5')
# loaded_model.load_weights(file_path)

# # Load model summary
# file_path = Path('../Model_Data/Date_Test_NN/'+symbol+'_model_summary.pkl')
# model_summary = load_obj(file_path)

# X , y, features, close_df = prepare_data(symbol,n_days,return_data=True)


# close_df["predicted"] = loaded_model.predict(X)

In [12]:
# export_path='../Model_Data/Date_Test_NN/'
# model_type = 'NN'

# model_summary_list = [] 

# for i in range(1,11):
#     # Load model summary
#     n_days_string = str(i)
#     file_path = Path('_model_summary.pkl')
#     model_summary = load_obj(file_path)
    
#     model_summary_list.append(model_summary)
# model_summary_list

In [13]:
# n_days = 1
# model_type = 'NN'

# export_path='../Model_Data/Date_Test_NN_noFTD/'
# export_path_prefix = export_path+symbol+'_'+model_type+'_'+n_days_string
# n_days_string = str(n_days)

# file_path = Path(export_path_prefix+'_model_data.json')
# with open(file_path, "r") as json_file:
#     model_json = json_file.read()
# loaded_model = model_from_json(model_json)

# # load weights into new model
# file_path = Path(export_path_prefix+'_model_weights.h5')
# loaded_model.load_weights(file_path)

# X , y, features, close_df = prepare_data(symbol,n_days,return_data=True)


# close_df["predicted"] = loaded_model.predict(X)
# close_df.set_index('Close_Date',inplace=True)
# close_df[['close','predicted']].tail(30).plot(use_index=True)