In [1]:
import pandas as pd
import numpy as np

from pathlib import Path

In [2]:
# Machine-learning specific imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [3]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [15]:
path = Path('../Resources/date_shift_acc_df_post_ML.pkl')
acc_df = load_obj(path)

acc_df = acc_df.reset_index().rename(columns={'index':'symbol'})
acc_df = acc_df.sort_values('acc_wo_ftd_low').reset_index(drop=True)

In [16]:
acc_df

Unnamed: 0,symbol,acc_w_ftd_low,acc_w_ftd_high,acc_wo_ftd_low,acc_wo_ftd_high
0,XPL,2.665433e-03,3.245822e-03,2.729351e-03,4.247193e-03
1,DHY,3.253384e-03,4.763971e-03,4.023974e-03,4.892987e-03
2,AUMN,3.963259e-03,4.876432e-03,4.049390e-03,6.080447e-03
3,CPHI,4.751523e-03,6.046605e-03,5.088973e-03,6.425967e-03
4,UAMY,5.359292e-03,6.494775e-03,5.740455e-03,6.685135e-03
...,...,...,...,...,...
760,NAKD,4.276503e+05,4.565947e+05,4.266976e+05,4.569624e+05
761,NSPR,5.070218e+05,5.513179e+05,5.419123e+05,5.518128e+05
762,AYTU,1.276814e+06,1.361455e+06,1.254663e+06,1.349234e+06
763,PSHG,1.148574e+07,1.241018e+07,1.155760e+07,1.250438e+07


In [17]:
acc_df.iloc[0].symbol

'XPL'

In [18]:
## Get top 20 symbols
top_symbols = []

for i in range(20):
    top_symbols.append(acc_df.iloc[i].symbol)

In [20]:
#top_symbols

In [21]:
## Retrain neural networks using no validation split. 
## Train right up to Dec 31st. Then attempt predictions into January and compare. 

In [22]:
test_symbol = top_symbols[3]
test_symbol

'CPHI'

In [23]:
def get_data(symbol, n_days):
    path = Path('../FilesExport_Complete_DFs_TI_noShift/'+symbol+'_TI_DF_no_shift.pkl')
    data = load_obj(path)
    df = data[symbol]
    
    df_close = df[['close']]
    df_close = df_close.reset_index().rename(columns={"Date": "Close_Date"})
    
    features_df = df.reset_index().drop(columns=['close','adjClose'])
    
    new_close_df = df_close.iloc[n_days: , :].reset_index(drop=True)
    
    ## Prevent multiple API calls each time, but use API when needed. 
    try:
        path = Path('../FilesExport_Updated_API_data/'+symbol+'_jan_2022.pkl')
        api_df = load_obj(path)
    except:
        api_df = get_FMP_historical_data(symbol)
    

    new_data = api_df[['close']]
    new_data = new_data.reset_index().rename(columns={"Date": "Close_Date"})
    new_data = new_data.iloc[0:n_days]
    
    new_close_df = new_close_df.append(new_data, ignore_index=True)
    

    
    return features_df, new_close_df

## Dropping QUANTITY_FAILS from dataframe before machine learning 


def prepare_data(symbol,n_days,return_data=False):
    features_df, new_close_df = get_data(symbol,n_days)
    
    X = features_df.drop(columns={'Date','QUANTITY_FAILS'}).values
    y = new_close_df['close'].values
    
    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)
    
    if return_data == True:
        return X, y, features_df, new_close_df
    else:
        return X, y

In [27]:
X, y = prepare_data(test_symbol,10)

In [28]:
X

array([[-1.21192453, -1.21804103, -1.24309245, ..., -0.57986621,
         0.6730346 , -1.28621835],
       [-1.26176113, -1.17210147, -1.24309245, ..., -0.34304963,
         0.41134332, -0.46718735],
       [-1.21192453, -1.17210147, -1.18975889, ..., -0.18742731,
         0.32357639, -0.74329128],
       ...,
       [ 0.5821928 ,  0.43578293,  0.51691518, ...,  1.19287504,
         0.60600062, -1.06347904],
       [ 0.53235621,  0.57360159,  0.62358231, ...,  1.43194702,
         0.67611777, -0.48342876],
       [ 0.48251961,  0.48172248,  0.46358162, ...,  1.47254415,
         0.79882278, -0.33029548]])

In [31]:
## Takes one symbol and runs model data. 
## Call function for each individal symbol. No return data.  

def mean_squared_model(
    symbol,
    export_path,  ## Require export path to help avoiding re-writing models 
    n_days = 5, ## Default value 5, but should be tried with 1-30  
    model_count = 5, ## Number of times model runs before saving the best one. 
    #validation_split_value=0.3, ## Default 0.3 for 70/30 split 
    epochs_value=400,
    units1 = 8,
    units2 = 8,
    num_of_inputs = 20,
    num_of_outputs= 1,
    model_type = 'NN'
    ):    

    X, y, = prepare_data(symbol,n_days)
    
    n_days_string = str(n_days) ## For exporting because can't concat 'int'
    accuracy_dict_symbol = str(symbol)+'_'+n_days_string
    export_path_prefix_lowAcc = export_path+'Low_Acc/'+symbol+'_'+model_type+'_'+n_days_string 
    export_path_prefix_highAcc = export_path+'High_Acc/'+symbol+'_'+model_type+'_'+n_days_string 


    for i in range(model_count):
        model_summary = {}
        ## Create Neural Network 

        # Define the model - deep neural network with two layers
        nn = Sequential()

        # First hidden layer
        nn.add(Dense(units=units1, input_dim=num_of_inputs, activation="relu"))

        # Second hidden layer
        nn.add(Dense(units=units2, activation="relu"))

        # Output layer
        nn.add(Dense(units=num_of_outputs, activation="linear"))

        # Compile the model
        nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

        # Fit the model
        model = nn.fit(X, y, 
                          epochs=epochs_value, 
                          verbose=0)
            
        ## End of if/elif

        model_loss, model_accuracy = nn.evaluate(X, y, verbose=0)
        
        model_summary = {
            'model_accuracy':model_accuracy,
            'n_days':n_days,
            #'validation_split_value':validation_split_value,
            'epochs_value':epochs_value,
                        }
        
        ## Save first model 
        if i == 0:
            model_accuracy_high = model_accuracy
            model_accuracy_low = model_accuracy
            
            symbol_accuracy_dict[accuracy_dict_symbol] = {
                'model_accuracy':model_accuracy,
                'n_days':n_days
            }
            
            # Save model data Low_Acc
            nn_json = nn.to_json()
            file_path = Path(export_path_prefix_lowAcc+'_model_data.json')
            with open(file_path, "w") as json_file:
                json_file.write(nn_json)

            # Save weights
            file_path = (export_path_prefix_lowAcc+'_model_weights.h5')
            nn.save_weights(file_path)
            
            ## Save model summary 
            file_path = (export_path_prefix_lowAcc+'_model_summary.pkl')
            save_obj(model_summary,file_path)
            
            # Save model data High_Acc
            nn_json = nn.to_json()
            file_path = Path(export_path_prefix_highAcc+'_model_data.json')
            with open(file_path, "w") as json_file:
                json_file.write(nn_json)

            # Save weights
            file_path = (export_path_prefix_highAcc+'_model_weights.h5')
            nn.save_weights(file_path)
            
            ## Save model summary 
            file_path = (export_path_prefix_highAcc+'_model_summary.pkl')
            save_obj(model_summary,file_path)
            
        ## Rewrite saved model if accuracy better 
        else:
            if model_accuracy < model_accuracy_low:
                ## Rewrite values 
                model_accuracy_low = model_accuracy
                symbol_accuracy_dict[accuracy_dict_symbol] = {
                    'model_accuracy':model_accuracy,
                    'n_days':n_days
                }
                
                # Rewrite saved files
                
                ## Save model data
                nn_json = nn.to_json()
                file_path = Path(export_path_prefix_lowAcc+'_model_data.json')
                with open(file_path, "w") as json_file:
                    json_file.write(nn_json)

                # Save weights
                file_path = (export_path_prefix_lowAcc+'_model_weights.h5')
                nn.save_weights(file_path)
                
                ## Save model summary 
                file_path = (export_path_prefix_lowAcc+'_model_summary.pkl')
                save_obj(model_summary,file_path)
                
            if model_accuracy > model_accuracy_high:
                ## Rewrite values 
                model_accuracy_high = model_accuracy
                symbol_accuracy_dict[accuracy_dict_symbol] = {
                    'model_accuracy':model_accuracy,
                    'n_days':n_days
                }
                
                # Rewrite saved files
                
                ## Save model data
                nn_json = nn.to_json()
                file_path = Path(export_path_prefix_highAcc+'_model_data.json')
                with open(file_path, "w") as json_file:
                    json_file.write(nn_json)

                # Save weights
                file_path = (export_path_prefix_highAcc+'_model_weights.h5')
                nn.save_weights(file_path)
                
                ## Save model summary 
                file_path = (export_path_prefix_highAcc+'_model_summary.pkl')
                save_obj(model_summary,file_path)
    ## End of for loop 
    ## Return nothing 

In [34]:
symbol = test_symbol
symbol_accuracy_dict = {}

mean_squared_model(symbol,
                   export_path='../Model_Data/Model_Test_Improve_NN_no_FTD/',
                   n_days=i,
                   epochs_value=200,
                   num_of_inputs=19
                  )