In [1]:
import pandas as pd

from pathlib import Path

In [2]:
# # Initial imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import model_from_json

# import matplotlib.pyplot as plt

# #from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# %matplotlib inline

In [3]:
## Load pickle for exports and imports of data  
import pickle 
def load_obj(path):
    with open(path, 'rb') as f:
        return pickle.load(f)
    
def save_obj(obj, path ):
    with open(path, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

In [4]:
## Create function to return dataframe based off symbol and if shift or no shift 
## Default to no shift 
def return_dataframe(symbol,shift=False):
    if shift == False:
        path = Path('../FilesExport_DFs_with_TI_pkl/'+symbol+'_data_dict_with_technicals.pkl')
        data_import = load_obj(path)
        df = data_import[symbol].copy()
    elif shift == True:
        path = Path('../FilesExport_DFs_with_TI_shifted/'+symbol+'_df_with_shifted_TI.pkl')
        data_import = load_obj(path)
        df = data_import[symbol].copy()
        
    return df 

In [5]:
## Import symbol list 
path = Path('../Resources/short_list_for_ML.pkl')
key_list = load_obj(path)
len(key_list)

379

In [6]:
## For testing 
length_ = 190
test_list = key_list[0:length_]
test_list[length_-1]

'INDP'

In [7]:
## Load non-shifted data 
def get_no_shift_model(symbol):
    
    df = return_dataframe(symbol,shift=False)

    ## Set X and y data 
    X = df.drop(columns={'close','adjClose'}).values
    y = df['close'].values

    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)

    # load json and create model
    file_path = Path('../Model_Data_low_loss/'+symbol+'_model_data.json')
    with open(file_path, "r") as json_file:
        model_json = json_file.read()
    loaded_model = model_from_json(model_json)

    # load weights into new model
    file_path = Path('../Model_Data_low_loss/'+symbol+'_model_weights.h5')
    loaded_model.load_weights(file_path)

    ## Compile loaded model and print mse score 
    loaded_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
    
    return loaded_model, df , X , y

In [8]:
## Load shifted data 
def get_with_shift_model(symbol):
    
    df = return_dataframe(symbol,shift=True)

    ## Set X and y data 
    X = df.drop(columns={'close'}).values
    y = df['close'].values

    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)

    # load json and create model
    file_path = Path('../Model_Data_shift_low_loss/'+symbol+'_model_data.json')
    with open(file_path, "r") as json_file:
        model_json = json_file.read()
    loaded_model = model_from_json(model_json)

    # load weights into new model
    file_path = Path('../Model_Data_shift_low_loss/'+symbol+'_model_weights.h5')
    loaded_model.load_weights(file_path)

    ## Compile loaded model and print mse score 
    loaded_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
    
    return loaded_model, df , X , y

In [9]:
model_dict_info = {} 
#dataframe_dict = {}

## Compare shifted model against non-shifted model 
## Keep whichever model is more accurate and store info in a dict  
def evaluate_models(symbol):
    noshift_model, df , X , y = get_no_shift_model(symbol)
    shift_model, df_shift , X_shift , y_shift = get_with_shift_model(symbol)
    noshift_score_ = noshift_model.evaluate(X, y, verbose=0)
    noshift_score = noshift_score_[0]
    shift_score_ = shift_model.evaluate(X_shift, y_shift, verbose=0)
    shift_score = shift_score_[0]
    
    if shift_score < noshift_score:
        model_dict_info[symbol] = {
            'model_type' : 'shift',
            'acc' : shift_score
        }
        #dataframe_dict[symbol] = df_shift
    else:
        model_dict_info[symbol] = {
            'model_type' : 'no_shift',
            'acc' : noshift_score
        }
        #dataframe_dict[symbol] = df

In [10]:
## Do predicitions 
def return_predictions_df(symbol):
    model_type = model_info_df.loc[symbol].model_type
    if model_type == 'shift':
        loaded_model, df , X , Y = get_with_shift_model(symbol)
    elif model_type == 'no_shift':
        loaded_model, df , X , y = get_no_shift_model(symbol)
      
    df['predicted'] = loaded_model.predict(X)
    df_pred = df[['close','predicted']]
    return df_pred

In [11]:
model_dict_info = {} 
for key in test_list:
    evaluate_models(key)
print(len(model_dict_info))
print()
print()
model_info_df = pd.DataFrame.from_dict(model_dict_info, orient='index')
model_info_df.sort_values('acc',inplace=True)  ## Most accurate models come first 
model_info_df

190




Unnamed: 0,model_type,acc
GBR,no_shift,1.303723e-04
EUO,no_shift,2.867345e-04
GALT,shift,1.866510e-03
BCLI,no_shift,3.124870e-03
AMSC,shift,3.275977e-03
...,...,...
DIA,shift,4.472363e+01
BEEM,shift,6.273642e+01
IFF,no_shift,1.750239e+02
FBRX,shift,2.086767e+03


In [12]:
model_info_df.iloc[80:90]

Unnamed: 0,model_type,acc
EQAL,no_shift,0.134511
EDN,shift,0.14226
CHEK,shift,0.166409
EWO,no_shift,0.170476
CIBR,no_shift,0.196762
CCXI,no_shift,0.207423
ILF,no_shift,0.210487
CEMI,shift,0.212236
FREL,shift,0.216465
GSG,shift,0.231658


In [13]:
model_info_df.head()
#model_info_df.iloc[8:18]

Unnamed: 0,model_type,acc
GBR,no_shift,0.00013
EUO,no_shift,0.000287
GALT,shift,0.001867
BCLI,no_shift,0.003125
AMSC,shift,0.003276


In [14]:
# ## Interesting Stocks - AEZS ,  
# interesting_list = ['AEZS','CTIB','EURN']
# test_df = predictions_df('EURN')
# print(test_df)
# print()
# print()
# ## Plot 
# ## Slice last 100 days 
# test_df2 = test_df.tail(30)
# test_df2.plot()

In [16]:
model_info_df.loc['GME']

model_type       shift
acc           2.260811
Name: GME, dtype: object

In [17]:
path = Path('../Resources/model_info_df.pkl')
save_obj(model_info_df,path)