In [1]:
import pandas as pd

import glob

from pathlib import Path
import csv

In [2]:
# Initial imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json

import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

%matplotlib inline

In [3]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [4]:
## Import symbol list 
path = Path('../Resources/short_list_for_ML.pkl')
key_list = load_obj(path)
len(key_list)

379

In [5]:
test_list = key_list[0:25]
test_list

['AAA',
 'ACOR',
 'ACWI',
 'ACWX',
 'ADMP',
 'AEZS',
 'AHPI',
 'AINC',
 'AMCX',
 'AMSC',
 'ANY',
 'ARAV',
 'ARCT',
 'ARDX',
 'ASTC',
 'ATHE',
 'ATNX',
 'ATXI',
 'AYRO',
 'AZRE',
 'AZRX',
 'BCLI',
 'BEEM',
 'BEST',
 'BGFV']

In [6]:
## Load non-shifted data 
def get_no_shift_model(symbol):
    path = Path('../FilesExport_DFs_with_TI_pkl/'+symbol+'_data_dict_with_technicals.pkl')
    data_import = load_obj(path)

    df = data_import[symbol].copy()

    ## Set X and y data 
    X = df.drop(columns={'close','adjClose'}).values
    y = df['close'].values

    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)

    # load json and create model
    file_path = Path('../Model_Data_low_loss/'+symbol+'_model_data.json')
    with open(file_path, "r") as json_file:
        model_json = json_file.read()
    loaded_model = model_from_json(model_json)

    # load weights into new model
    file_path = Path('../Model_Data_low_loss/'+symbol+'_model_weights.h5')
    loaded_model.load_weights(file_path)

    ## Compile loaded model and print mse score 
    loaded_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
    
    return loaded_model, df , X , y

In [7]:
#noshift_model, df , X , y = get_no_shift_model(test_list[2])

In [8]:
# score = noshift_model.evaluate(X, y, verbose=0)
# score[0]
# print("%s: %.2f%%" % (noshift_model.metrics_names[1], score[1]*100))
# print()

# ## take dataframe copy and compare predicted values to actual values 
# df2 = df.copy()
# df2['predicted'] = noshift_model.predict(X)
# df_pred = df2[['close','predicted']]
# df_pred

In [9]:
## Load non-shifted data 
def get_with_shift_model(symbol):
    path = Path('../FilesExport_DFs_with_TI_shifted/'+symbol+'_df_with_shifted_TI.pkl')
    data_import = load_obj(path)

    df = data_import[symbol].copy()

    ## Set X and y data 
    X = df.drop(columns={'close'}).values
    y = df['close'].values

    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)

    # load json and create model
    file_path = Path('../Model_Data_shift_low_loss/'+symbol+'_model_data.json')
    with open(file_path, "r") as json_file:
        model_json = json_file.read()
    loaded_model = model_from_json(model_json)

    # load weights into new model
    file_path = Path('../Model_Data_shift_low_loss/'+symbol+'_model_weights.h5')
    loaded_model.load_weights(file_path)

    ## Compile loaded model and print mse score 
    loaded_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
    
    return loaded_model, df , X , y

In [10]:
# shift_model, df , X , y = get_with_shift_model(test_list[2])
# score = shift_model.evaluate(X, y, verbose=0)
# score[0]

In [11]:
model_dict_info = {} 
#dataframe_dict = {}

def evaluate_models(symbol):
    noshift_model, df , X , y = get_no_shift_model(symbol)
    shift_model, df_shift , X_shift , y_shift = get_with_shift_model(symbol)
    noshift_score_ = noshift_model.evaluate(X, y, verbose=0)
    noshift_score = noshift_score_[0]
    shift_score_ = shift_model.evaluate(X_shift, y_shift, verbose=0)
    shift_score = shift_score_[0]
    
    if shift_score < noshift_score:
        model_dict_info[symbol] = {
            'model_type' : 'shift',
            'acc' : shift_score
        }
        #dataframe_dict[symbol] = df_shift
    else:
        model_dict_info[symbol] = {
            'model_type' : 'no_shift',
            'acc' : noshift_score
        }
        #dataframe_dict[symbol] = df

In [12]:
model_dict_info = {} 
for key in test_list:
    evaluate_models(key)
len(model_dict_info)

25

In [32]:
model_info_df = pd.DataFrame.from_dict(model_dict_info, orient='index')
#model_dataframe.reset_index(inplace=True)
model_info_df.sort_values('acc',inplace=True)  ## Most accurate models come first 
#model_dataframe.reset_index(inplace=True,drop=True)
model_info_df

Unnamed: 0,model_type,acc
BCLI,no_shift,0.00312487
AMSC,shift,0.003275977
AEZS,no_shift,0.005173338
ATHE,no_shift,0.02336701
ADMP,shift,0.02370457
BGFV,no_shift,0.03680585
ATXI,no_shift,0.07067446
AHPI,shift,0.07649102
ASTC,shift,0.08908951
ARDX,shift,0.2543498


In [34]:
model_info_df.loc['BEST'].model_type

'no_shift'

In [30]:
## Do predicitions 
def predictions_df(symbol):
    model_type = model_info_df.loc[symbol].model_type
    if model_type == 'shift':
        loaded_model, df , X , Y = get_with_shift_model(symbol)
    elif model_type == 'no_shift':
        loaded_model, df , X , y = get_no_shift_model(symbol)
      
    
    #df2 = df.copy()
    df['predicted'] = loaded_model.predict(X)
    df_pred = df[['close','predicted']]
    return df_pred

In [38]:
test_df = predictions_df('BCLI')
test_df

Unnamed: 0_level_0,close,predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-02-02,2.490,2.472308
2016-02-03,2.360,2.360104
2016-02-04,2.370,2.371912
2016-02-05,2.330,2.347617
2016-02-08,2.270,2.263826
...,...,...
2021-10-25,3.060,3.055218
2021-10-26,2.970,2.967572
2021-10-27,2.930,2.935076
2021-10-28,2.870,2.869040
