In [1]:
import sys
sys.path.append('/home/ak/Documents/Research/PaperCode/singlekernelclf/')
from fileutils import DataLoader as DataLoader
from fileutils import paths
from fileutils import new_feature_utils as nfu
from fileutils.new_feature_utils import CreateMarketFeatures
from clfutils import FitModels
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, precision_score, hamming_loss
import os
import multiprocessing
import psutil
import pickle
from collections import defaultdict
import itertools

In [2]:

def open_pickle_filepath(pickle_file):
    pickle_to_file = pickle.load(open(pickle_file, "rb"), encoding='latin1')

    return pickle_to_file


def forwardDates(list_of_keys, current_date):
    """
    return all the forward looking dates for each idxKey we use for training

    :param list_of_keys: dates i have model dates for out of sample
    :param current_date: current model date
    :return: forward dates for applying the fitted model
    """
    lookAheadKeys = sorted(i for i in list_of_keys if i > current_date)
    return lookAheadKeys


def evaluate_predictions(y_true, y_preds):
    """
    Performs evaluation comparison on y_true labels vs. y_pred labels
    on a classification.
    """
    accuracy = accuracy_score(y_true, y_preds)
    precision = precision_score(y_true, y_preds, average='weighted')
    precision_macro = precision_score(y_true, y_preds, average='macro')
    precision_micro = precision_score(y_true, y_preds, average='micro')
    recall = recall_score(y_true, y_preds, average='weighted')
    f1_weighted = f1_score(y_true, y_preds, average='weighted')
    f1_macro = f1_score(y_true, y_preds, average='macro')
    f1_micro = f1_score(y_true, y_preds, average='micro')
    hamming_loss_value = hamming_loss(y_true, y_preds)

    metric_dict = {"accuracy": round(accuracy, 2),
                   "precision": round(precision, 2),
                   "recall": round(recall, 2),
                   "f1- weighted": round(f1_weighted, 2),
                   "f1- micro": round(f1_micro, 2),
                   "f1- macro": round(f1_macro, 2),
                   "Hamming Loss": round(hamming_loss_value, 2)}
    print(f"Acc: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 score weighted: {f1_weighted:.2f}")
    print(f"F1 score macro: {f1_macro:.2f}")
    print(f"F1 score micro: {f1_micro:.2f}")
    print(f"Hamming Loss Value: {hamming_loss_value:.2f}")

    return metric_dict


In [3]:
targetMainPath = '/media/ak/DataOnly/'
symbolFeaturesDirectories = os.path.join(targetMainPath, 'SymbolFeatureDirectories')


In [4]:
os.path.join(symbolFeaturesDirectories,os.listdir(symbolFeaturesDirectories)[2])

'/media/ak/DataOnly/SymbolFeatureDirectories/NG.L'

In [5]:
# alternate_labels_nos = [1, 2, 3, 4, 5, 6, 7]  # we have 7 alternative data types
mainPath = paths('main')
label_idx = 0  # to be serialised


fittedModelsPath = os.path.join(mainPath, "ExperimentCommonLocs/FittedModels")
oosPredictionsPath = os.path.join(mainPath, "ExperimentCommonLocs/OOSPredictions")

symbols = sorted(os.listdir(paths('symbols_features')))
print(symbols) # all symbols


['AAL.L', 'APF.L', 'AV.L', 'AZN.L', 'BARC.L', 'BATS.L', 'BLT.L', 'CCL.L', 'CEY.L', 'CNA.L', 'CPG.L', 'DGE.L', 'HSBA.L', 'IOG.L', 'ITV.L', 'KGF.L', 'LAND.L', 'LGEN.L', 'LLOY.L', 'MAB.L', 'MKS.L', 'NG.L', 'PRU.L', 'PSON.L', 'RB.L', 'RBS.L', 'RDSa.L', 'RDSb.L', 'REL.L', 'RR.L', 'RSA.L', 'RTO.L', 'SDR.L', 'SGE.L', 'SHP.L', 'SMIN.L', 'SPT.L', 'STAN.L', 'TSCO.L', 'ULVR.L', 'UU.L', 'VOD.L', 'WPP.L']


In [6]:
#/media/ak/DataOnly/ExperimentCommonLocs/FittedModels
np.unique([f.split("_")[0] for f in os.listdir(fittedModelsPath)])

array(['AAL.L', 'AZN.L', 'BATS.L', 'CCL.L', 'LAND.L', 'LLOY.L', 'MKS.L',
       'PRU.L', 'RB.L'], dtype='<U6')

In [8]:
os.listdir(fittedModelsPath)

['LLOY.L_model_fit_date_20170724_6_SingleKernelSVC.pkl',
 'PRU.L_model_fit_date_20170711_4_SingleKernelSVC.pkl',
 'BATS.L_model_fit_date_20180411_6_SingleKernelSVC.pkl',
 'MKS.L_model_fit_date_20170918_5_SingleKernelSVC.pkl',
 'MKS.L_model_fit_date_20180221_5_SingleKernelSVC.pkl',
 'LAND.L_model_fit_date_20170913_7_SingleKernelSVC.pkl',
 'CCL.L_model_fit_date_20170804_5_SingleKernelSVC.pkl',
 'AZN.L_model_fit_date_20170123_7_SingleKernelSVC.pkl',
 'AAL.L_model_fit_date_20170809_2_SingleKernelSVC.pkl',
 'AAL.L_model_fit_date_20170821_2_SingleKernelSVC.pkl',
 'RB.L_model_fit_date_20180411_3_SingleKernelSVC.pkl',
 'MKS.L_model_fit_date_20170119_5_SingleKernelSVC.pkl',
 'AAL.L_model_fit_date_20170706_3_SingleKernelSVC.pkl',
 'PRU.L_model_fit_date_20170124_4_SingleKernelSVC.pkl',
 'PRU.L_model_fit_date_20170803_4_SingleKernelSVC.pkl',
 'AAL.L_model_fit_date_20170130_2_SingleKernelSVC.pkl',
 'LLOY.L_model_fit_date_20170721_6_SingleKernelSVC.pkl',
 'CCL.L_model_fit_date_20170818_5_SingleKerne

In [7]:
symbol_idx = 7
symbol = symbols[symbol_idx]  # to be serialised so read all the symbols
print(symbol)

CCL.L


In [None]:
alternate_labels_nos = [ 1, 2, 3, 4, 5, 6, 7]  # we have 7 alternative data types
mainPath = paths('main')
symbolData = DataLoader(mainPath, symbol) # initiate a path where all the data should be
pickled_models = [f for f in os.listdir(fittedModelsPath) if str(symbol) in f]  # list of all the pickled models
print(pickled_models)

In [None]:
best_svc[str(symbol)][model_date]['SVC'].cv_results_

In [None]:
  print('******* Finished and now saving -*-*-*-')
    
                        pickle_out_filename = os.path.join(oosPredictionsPath, "_".join(
                            (symbol, str("Label_" )+ str(alternate_labels_nos[label_idx]), forwardDateKey, 'OOS_results_dict.pkl')))
                        pickle_out = open(pickle_out_filename, 'wb')
                        pickle.dump(oos_svc_predictions, pickle_out)
                        pickle_out.close()
                        print('saved', pickle_out_filename)

In [None]:
len(np.unique(error_dates)) # actual unique dates
str("Label_" )+ str(alternate_labels_nos[label_idx])

In [None]:
labels_dates =list(labels_paths.keys())
len(labels_dates)

In [None]:
len(list(set(forwardDatesList).intersection(labels_dates)))

In [None]:
oosPredictionsPath

In [None]:

    #                         # create features - first HMM and second some Market Features!
    #
    #                         hmm_features = nfu.hmm_features_df(open_pickle_filepath(features_paths[forwardDateKey]))
    #
    #                         if hmm_features.isnull().values.all():
    #                             print('Problem: your HMM features did not compute properly')
    #                         else:
    #
    #                             market_features_df = CreateMarketFeatures(
    #                                 CreateMarketFeatures(
    #                                     CreateMarketFeatures(df=CreateMarketFeatures(df=labels).ma_spread_duration())
    #                                         .ma_spread()).chaikin_mf()).obv_calc()  # market features dataframe
    #
    #                             df_concat = pd.DataFrame(
    #                                 pd.concat([hmm_features, market_features_df], axis=1, sort='False').dropna())
    #
    #                             df = df_concat[df_concat[label_name].notna()]
    #                             df_final = df.drop(
    #                                 columns=['TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice',
    #                                          'Volume', label_name])
    #
    #                             y_test = df[df.columns[df.columns.str.contains(pat='label')]].iloc[:, 0]

In [None]:
for best_svc_key_date in best_svc_key_dates:
            #     #
            #     #     # all the various combinations of HMM dates,
            #     #     # features models.
            #     #
            #     #     # all the labels dates that are after the key date that this model was fitted
            #
            #
            #
            #         # all the various paths
            #
            #         features_paths = symbolData.hmm_model_date_feature_list_filepaths(hmm_date)[1]
            #
            #         # for each forward date
            #
            #         for forwardDateKey in forwardDatesList:
            #
            #             if model_date < forwardDateKey:  # simple check that your model date is not after your forward date!
            #                 oos_svc_predictions = defaultdict(dict)
    #
    #                         # get your labels
    #
    #                         labels = pd.read_csv(labels_paths[forwardDateKey])
    #                         label_name = str(labels.columns[labels.columns.str.contains(pat='label')].values[0])
    #
    #                         # create features - first HMM and second some Market Features!
    #
    #                         hmm_features = nfu.hmm_features_df(open_pickle_filepath(features_paths[forwardDateKey]))
    #
    #                         if hmm_features.isnull().values.all():
    #                             print('Problem: your HMM features did not compute properly')
    #                         else:
    #
    #                             market_features_df = CreateMarketFeatures(
    #                                 CreateMarketFeatures(
    #                                     CreateMarketFeatures(df=CreateMarketFeatures(df=labels).ma_spread_duration())
    #                                         .ma_spread()).chaikin_mf()).obv_calc()  # market features dataframe
    #
    #                             df_concat = pd.DataFrame(
    #                                 pd.concat([hmm_features, market_features_df], axis=1, sort='False').dropna())
    #
    #                             df = df_concat[df_concat[label_name].notna()]
    #                             df_final = df.drop(
    #                                 columns=['TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice',
    #                                          'Volume', label_name])
    #
    #                             y_test = df[df.columns[df.columns.str.contains(pat='label')]].iloc[:, 0]
    #
    #                             try:
    #
    #                                 X_test = MinMaxScaler().fit_transform(df_final)
    #
    #                                 y_pred = best_svc[str(symbol)][model_date]['SVC'].predict(X_test)
    #                                 print(evaluate_predictions(y_test, y_pred))
    #                                 # store the results
    #                                 results_predict_alias = "_".join(
    #                                     (symbol, forwardDateKey, str(alternate_labels_nos[label_idx])))
    #                                 oos_svc_predictions[results_predict_alias][forwardDateKey] = evaluate_predictions(
    #                                     y_test, y_pred)
    #
    #                             except ValueError:
    #                                 print('value error here:****************************************')
    #                                 continue
    #
    #                     else:
    #
    #                         pass
    #

In [None]:
best_svc['CCL.L']['20170710']

In [None]:
def iterdict(d):
    for k, v in d.items():
        if isinstance(v, dict):
            iterdict(v)
        else:
            print (k, ":", v)

In [None]:
best_svc.items()