In [14]:

import pandas as pd
import numpy as np
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support
sc = StandardScaler()
import os
import pickle
import fnmatch
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, precision_score

##useful function


def fwd_dates(_dates_list, _key_date):
    # returns a list of dates that are forward from the key_date
    fwd_dates_list = [i for i in _dates_list if i > _key_date]
    return fwd_dates_list

def common_member(a, b): 
      
    a_set = set(a) 
    b_set = set(b) 
      
    # check length  
    if len(a_set.intersection(b_set)) > 0: 
        return(a_set.intersection(b_set))   
    else: 
        return("no common elements") 


def remove_nans(features_tuple, labels, idx=1):
    # not the cleanest but useful
    # function to clean up nans as I seem to use it a lot, so better to have one function
    # combines the features and labels and removes rows with nans across so we dont lose the ordering
    # returns features and labels
    features_df = pd.concat([features_tuple[0], features_tuple[1], features_tuple[2], \
                             features_tuple[3]], axis=1, sort=False)
    labels_only = labels.drop(columns=['ReturnTradedPrice', 'Duration', 'states', 'TradedTime',
                                       'TradedPrice'], axis=1)
    df_concat = pd.concat([features_df, labels_only.iloc[:, 0:idx]], axis=1, sort='False')
    # only using 1st set of labels- but we can re-write this a bit
    df_x_nan = df_concat.dropna()  # dropping all nans
    label_column_loc_ = df_x_nan.shape[1] - 1  # location of labels column in the clean df
    labels_ = df_x_nan.iloc[:, label_column_loc_:label_column_loc_ + 1]  # keep pure labels
    features_ = df_x_nan.drop(df_x_nan.columns[label_column_loc_], axis=1)  # keeping the features only
    return features_, labels_


def prec_recall_report(y_true, y_predict):
    # function to ge the sci-kit learn classification metrics into a pretty DF for csv!
    report = pd.DataFrame(list(precision_recall_fscore_support(y_true, y_predict)),
                          index=['Precision', 'Recall', 'F1-score', 'Support']).T
    # Now add the 'Avg/Total' row
    report.loc['Avg/Total', :] = precision_recall_fscore_support(y_true, y_predict, average='weighted')
    report.loc['Avg/Total', 'Support'] = report['Support'].sum()
    return report


class DataLoader(object):
    def __init__(self, path_main, ticker):
        self.main_path = path_main
        self.ticker = ticker

        self.features_labels_path = os.path.join(self.main_path, 'features_models')
        self.features_path = os.path.join(self.features_labels_path, 'features')
        # collection of per symbol non directional labels
        self.labels_path = os.path.join(self.features_labels_path, 'labels', self.ticker, 'NON_DIRECTIONAL')
        self.symbol_features_path = os.path.join(self.features_labels_path, 'features', self.ticker, 'MODEL_BASED')
        # list of all the model -oos hmm feature dates - each folder is a collection of oos feature dates
        self.hmm_dates_list = os.listdir(self.symbol_features_path)  # each folder are the OOS features from each HMM
        self.compute_date = os.listdir(os.path.join( \
            self.symbol_features_path, \
            os.listdir(self.symbol_features_path)[1]))[1].split("_")[7]

    def ticker_features(self, model_date, date):
        # need to make this a lot more flexible with number of states
        if model_date < date:
            file_name = "_".join(
                (self.ticker, '3', 'states', 'features', 'date:', date, 'now:', self.compute_date, '.pickle'))
            file_loc = os.path.join(self.symbol_features_path, str(model_date), file_name)
            with open(file_loc, 'rb') as handle:
                ticker_features = pickle.load(handle)
        else:
            print('Loading Feature Date which is in-sample. Change your Model Date')
        return ticker_features

    def ticker_labels_csv(self, date):
        file_loc = os.path.join(self.labels_path, str(date) + '.csv')
        ticker_labels = pd.read_csv(file_loc, index_col=0)
        return ticker_labels

    @staticmethod
    def open_pickle_file(path, pickle_file):
        file_loc = os.path.join(path, pickle_file)
        pickle_to_file = pickle.load(open(file_loc, "rb"))
        return pickle_to_file

    @staticmethod
    def get_date_from_file(file_, numb_):
        return os.path.splitext(file_[numb_])[0]

class MarketFeatures(object):
    # a class to be expanded that uses features for base case -market based only-indicators/features
    """"Requires:
    a dataframe that has TradedPrice And Volume columns
    symbol - A stock symbol on which to form a strategy on.
    short_window - Lookback period for short moving average.
    long_window - Lookback period for long moving average.
    """

    def __init__(self, df):
        #         self.ticker = ticker
        self.df = df

    def load_data(self):
        pass

    def ma_spread(self, short_window=5, long_window=20):
        # function that produces the MA spread, which can be used on its own or as an input for MACD
        short_rolling_px = self.df['TradedPrice'].rolling(window=short_window).mean()
        long_rolling_px = self.df['TradedPrice'].rolling(window=long_window).mean()
        px_name = "_".join(('px_indx', str(short_window), str(long_window)))
        self.df[px_name] = long_rolling_px - short_rolling_px
        return self.df

    def ma_spread_duration(self, short_window=5, long_window=20):
        # function that produces the MA spread, which can be used on its own or as an input for MACD
        short_rolling_px = self.df['Duration'].rolling(window=short_window).mean()
        long_rolling_px = self.df['Duration'].rolling(window=long_window).mean()
        dur_name = "_".join(('dur_indx', str(short_window), str(long_window)))
        self.df[dur_name] = long_rolling_px - short_rolling_px
        return self.df

    def obv_calc(self):
        # on balance volume indicator
        self.df['SignedVolume'] = self.df['Volume'] * np.sign(self.df['TradedPrice'].diff()).cumsum()
        self.df['SignedVolume'].iat[1] = 0
        self.df['OBV'] = self.df['SignedVolume']  # .cumsum()
        self.df = self.df.drop(columns=['SignedVolume'])
        return self.df

    def chaikin_mf(self, period=5):
        # Chaikin money flow indicator
        self.df["MF Multiplier"] = (self.df['TradedPrice'] - (self.df['TradedPrice'].expanding(period).min()) \
                                    - (self.df['TradedPrice'].expanding(period).max() \
                                       - self.df['TradedPrice'])) / (
                                           self.df['TradedPrice'].expanding(period).max() - self.df[ \
                                       'TradedPrice'].expanding(period).min())
        self.df["MF Volume"] = self.df['MF Multiplier'] * self.df['Volume']
        self.df['CMF_' + str(period)] = self.df['MF Volume'].sum() / self.df["Volume"].rolling(period).sum()
        self.df = self.df.drop(columns=['MF Multiplier', 'MF Volume'])
        return self.df

In [15]:
os.getenv('FINANCE_DATA')

'/media/ak/WorkDrive/Data/'

In [16]:

# locations

data_dir = os.getenv('FINANCE_DATA')  # main directory referenced in all the code
data_only_drive = '/mnt/usb-Seagate_Expansion_Desk_NA8XEHR6-0:0-part2'  # external date only drive

# this is the central location for all the features/models/predictions
features_models = os.path.join(data_dir,
                               'features_models')
features_models_dod = os.path.join(data_only_drive, 'features_models')
# main path where all the sub-directories are (features, models, labels)

# this is the central location for all the labels
labels = os.path.join(features_models, 'labels')  # label subdirectory
# this is the central location for all the features #feature subdirectory
features = os.path.join(features_models, 'features')

# location to save results
model_save_loc = os.path.join(data_only_drive, 'Data', 'features_models', 'models')

# location where all the models are:

model_save_loc = os.path.join(data_only_drive, 'Data', 'features_models','models')
# from the main directory select all the symbols that are finishing in .L for FTSE
symbols_ftse = [s for s in os.listdir(features) if s.endswith('.L')]

main_path = os.path.join(data_dir, 'features_models')  # main directory

# location to save results
model_loc = os.path.join(data_only_drive, 'Data', 'features_models', 'models')

metrics_loc = os.path.join(data_only_drive, 'Data','features_models','metrics')

features_path = os.path.join(main_path, 'features')  # all the features - same as above -redundant

labels_path = os.path.join(main_path, 'labels')  # all the labels

# same as above- new target directory, where all the models and output is saved
# on the data only drive

model_paths = os.path.join(data_only_drive, 'Data', 'features_models',
                           'models')

# using lambda to make a small function that just takes in the symbol
# and produces the relevant path of all fitted single kernel models
# this path is from the Data Only Drive

def symbol_fitted_models_path(symbol): return os.path.join(model_paths, symbol, 'SINGLE_KERNEL')


# provides a fitted list of above path

def symbol_list_fitted_dates(symbol): return sorted(os.listdir(symbol_fitted_models_path(symbol)))


# fitted model sub-directory- the fitted model is stored in this sub-directory as a pickle

def symbol_fitted_model_date_loc(file_path, model_date_no):
    return os.path.join(file_path, str(symbol_list_fitted_dates(symbol)[model_date_no]))


def symbol_model_date_loc(model_date_path):
    return os.path.join(model_date_path, os.listdir(model_date_path)[0])


def model_pickle_to_svc(model_pickle):

    pickle_to_file = pickle.load(open(model_pickle, "rb"))
    best_estimator = pickle_to_file['SVC'].best_estimator_

    return best_estimator


# test case ##

# symbols to use as a starting point
good_symbols = [
    'RDSa.L', 'PRU.L', 'III.L', 'REL.L', 'CNA.L', 'SHP.L', 'MKS.L',
    'CPI.L', 'ULVR.L', 'ECM.L', 'AV.L', 'GKN.L', 'TSCO.L', 'ITV.L',
    'BARC.L', 'CPG.L', 'AAL.L', 'LGEN.L', 'LAND.L', 'VOD.L', 'HSBA.L',
    'RSA.L', 'DMGOa.L', 'RR.L', 'DGE.L', 'BATS.L', 'MAB.L',
    'KGF.L', 'SPT.L', 'AZN.L'
]



In [76]:
if __name__ == '__main__':


    symbol = good_symbols[0]  # picking PRU as an example

    # test symbol path, which essentially produces the path where all the fitted models are.
    # '/mnt/usb-Seagate_Expansion_Desk_NA8XEHR6-0:0-part2/Data/features_models/models/SPT.L/SINGLE_KERNEL'
    symbol_labels_path = os.path.join(labels_path, symbol, 'NON_DIRECTIONAL')
    labels_dates = [os.listdir(symbol_labels_path)[idx].split(".")[0] 
                    for idx, _ in enumerate(os.listdir(symbol_labels_path))]
    
    # specific symbol features list of directories. so this has all the model-based directories of features
    # each date on this list corresponds to an hmm model, and each date-directory contains all the features 
    # constructed out of sample
    
    symbol_features_dates_path = os.path.join(features, symbol, 'MODEL_BASED')
    symbol_model_path = os.path.join(model_paths,symbol,'SINGLE_KERNEL')
    symbol_model_locations = [os.path.join(symbol_model_path,os.listdir(symbol_model_path)[idx]) for idx,
                              _ in enumerate(os.listdir(symbol_model_path))]
    
    # we construct a list of all the hmm-model-date directories, each containing OOS features
    features_dates = os.listdir(symbol_features_dates_path)
    model_dates = os.listdir(symbol_model_path)
    common_dates= sorted(list(common_member(sorted(list(common_member(features_dates, model_dates)))
              , labels_dates))) #dates common for labels, features, models
    
    
 
    
  

In [111]:
datacls = DataLoader(path_main=data_dir, ticker=symbol)  # test symbol -create class
symbol_compute_date = datacls.compute_date
from collections import defaultdict
all_symbols_d = defaultdict(dict)
symbol_model_dates = dict()
symbol_compute_date

'20181225'

In [188]:
print symbol
from datetime import datetime
from datetime import timedelta
#create a time-delta which will be needed for the common date
daysdelta=timedelta(days=1)



# now lets take all the model directories and locations##

for common_date in common_dates:
    #move the common date (which is the hmm date, and folder name) one dat forward
    common_day_start=datetime.strptime(common_date, '%Y%m%d') +daysdelta
    #convert it back to a string (prob we can do in one go)
    first_oos_day= common_day_start.strftime('%Y%m%d')
    # getting the model
    model_pickle= os.path.join(symbol_model_path,common_date,
                               os.listdir(os.path.join(symbol_model_path,common_date))[0])
    pickle_to_file = pickle.load(open(model_pickle, "rb")) #load your model

    best_estimator = pickle_to_file['SVC'].best_estimator_
    print('Your symbol is:', symbol, 'and the model date is:' ,common_date)
    # set a few OOS dates
    
    fwd_dates_list = sorted([i for i in common_dates if i > first_oos_day])[:3]
    # set up the dictionary for metrics #
    M = len(fwd_dates_list)
    T = 1
    T_2 = 4
    fitted_models_results = {
            'accuracy': np.empty((M,T)),
            'recall': np.empty((M,T)),
            'F1-score': np.empty((M,T)),
            'precision_recall_fscore_support': np.empty((M, T_2))
        }
    
    #get the features
    oos_features_path= os.path.join(symbol_features_dates_path, common_date)
    
    for dic_idx, fwd_date in enumerate(fwd_dates_list):
        print fwd_date
        feature_file = "_".join((symbol,'3_states_features_date:',fwd_date,'now:',symbol_compute_date,'.pickle'))
        features_loc = os.path.join(symbol_features_dates_path,common_date, feature_file)
        features_tuple=pickle.load(open(features_loc, "rb"))
        market_data_oos= pd.read_csv(os.path.join(symbol_labels_path, 
                                                  '.'.join((fwd_date,'csv'))),index_col=0)
        features_df = pd.concat([features_tuple[0], features_tuple[1],
                             features_tuple[2], features_tuple[3]], axis=1, sort=False)
        df_w_market_features = MarketFeatures(df=MarketFeatures(\
                                                                df=MarketFeatures(
                        df=MarketFeatures(df=market_data_oos).obv_calc()).chaikin_mf()).ma_spread()).ma_spread_duration()

        df_concat = pd.concat([features_df, df_w_market_features], axis=1, sort='False').dropna()

        label_name = str(df_concat.columns[df_concat.columns.str.contains(pat='label')].values[0])

        df_final = df_concat.drop(columns=['TradedPrice', 'Duration', 'TradedTime', 'ReturnTradedPrice', \
                                           'Volume', label_name])
        if len(df_final)> 5:
            X = MinMaxScaler().fit_transform(df_final)

            y_labels = df_concat[df_concat.columns[df_concat.columns.str.contains(pat='label')]].iloc[:, 0]
            y_predict = best_estimator.predict(X)
            print accuracy_score(y_labels, y_predict)
            
            fitted_models_results['accuracy'][dic_idx, :] = accuracy_score(y_labels, y_predict)
            fitted_models_results['recall'][dic_idx, :] = recall_score(y_true=y_labels, y_pred=y_predict)
            fitted_models_results['F1-score'][dic_idx, :] =f1_score(y_true= y_labels, y_pred=y_predict)
            fitted_models_results['precision_recall_fscore_support'][dic_idx, :] = precision_recall_fscore_support(y_true=  y_labels, y_pred=y_predict, average='micro')
            results_loc = str(os.path.join(metrics_loc, "_".join((symbol,model_date,"results_metrics.pickle"))))        
        else:
            print ('skipping')
        
        with open(results_loc, 'wb') as f:
            pickle.dump(fitted_models_results, f)
        print fitted_models_results


RDSa.L
('Your symbol is:', 'RDSa.L', 'and the model date is:', '20170818')
20170821
0.9187613843351549
{'precision_recall_fscore_support': array([[0.91876138, 0.91876138, 0.91876138,        nan],
       [0.78137988, 0.78137988, 0.78137988,        nan],
       [0.44346178, 0.44346178, 0.44346178,        nan]]), 'recall': array([[0.54545455],
       [0.99768372],
       [0.99768372]]), 'F1-score': array([[0.02620087],
       [0.99768372],
       [0.99768372]]), 'accuracy': array([[0.91876138],
       [0.99768372],
       [0.99768372]])}
20170822
0.8069102033471297
{'precision_recall_fscore_support': array([[0.91876138, 0.91876138, 0.91876138,        nan],
       [0.8069102 , 0.8069102 , 0.8069102 ,        nan],
       [0.44346178, 0.44346178, 0.44346178,        nan]]), 'recall': array([[0.54545455],
       [0.5       ],
       [0.99768372]]), 'F1-score': array([[0.02620087],
       [0.00740056],
       [0.99768372]]), 'accuracy': array([[0.91876138],
       [0.8069102 ],
       [0.997683

0.9254627313656828
{'precision_recall_fscore_support': array([[0.92546273, 0.92546273, 0.92546273,        nan],
       [0.82221103, 0.82221103, 0.82221103,        nan],
       [0.60130065, 0.60130065, 0.60130065,        nan]]), 'recall': array([[0.55555556],
       [0.82221103],
       [0.60130065]]), 'F1-score': array([[0.03246753],
       [0.82221103],
       [0.60130065]]), 'accuracy': array([[0.92546273],
       [0.82221103],
       [0.60130065]])}
20170901
0.8237259816207184
{'precision_recall_fscore_support': array([[0.92546273, 0.92546273, 0.92546273,        nan],
       [0.82372598, 0.82372598, 0.82372598,        nan],
       [0.60130065, 0.60130065, 0.60130065,        nan]]), 'recall': array([[0.55555556],
       [0.4       ],
       [0.60130065]]), 'F1-score': array([[0.03246753],
       [0.00627943],
       [0.60130065]]), 'accuracy': array([[0.92546273],
       [0.82372598],
       [0.60130065]])}
20170904
0.8328215626066189
{'precision_recall_fscore_support': array([[0.925

skipping
{'precision_recall_fscore_support': array([[0.92546273, 0.92546273, 0.92546273,        nan],
       [0.82372598, 0.82372598, 0.82372598,        nan],
       [0.83282156, 0.83282156, 0.83282156,        nan]]), 'recall': array([[0.99761174],
       [0.9980098 ],
       [0.99641406]]), 'F1-score': array([[0.99761174],
       [0.9980098 ],
       [0.99641406]]), 'accuracy': array([[0.99761174],
       [0.9980098 ],
       [0.99641406]])}
20170913
skipping
{'precision_recall_fscore_support': array([[0.92546273, 0.92546273, 0.92546273,        nan],
       [0.82372598, 0.82372598, 0.82372598,        nan],
       [0.83282156, 0.83282156, 0.83282156,        nan]]), 'recall': array([[0.99761174],
       [0.9980098 ],
       [0.99641406]]), 'F1-score': array([[0.99761174],
       [0.9980098 ],
       [0.99641406]]), 'accuracy': array([[0.99761174],
       [0.9980098 ],
       [0.99641406]])}
20170914
skipping
{'precision_recall_fscore_support': array([[0.92546273, 0.92546273, 0.92546273,

0.9949003245248029
{'precision_recall_fscore_support': array([[0.99656774, 0.99656774, 0.99656774,        nan],
       [0.99490032, 0.99490032, 0.99490032,        nan],
       [0.00262467, 0.00262467, 0.00262467,        nan]]), 'recall': array([[0.        ],
       [0.        ],
       [0.99697153]]), 'F1-score': array([[0.        ],
       [0.        ],
       [0.99697153]]), 'accuracy': array([[0.99656774],
       [0.99490032],
       [0.99697153]])}
20170925
0.9983397897066962
{'precision_recall_fscore_support': array([[0.99656774, 0.99656774, 0.99656774,        nan],
       [0.99490032, 0.99490032, 0.99490032,        nan],
       [0.99833979, 0.99833979, 0.99833979,        nan]]), 'recall': array([[0.],
       [0.],
       [0.]]), 'F1-score': array([[0.],
       [0.],
       [0.]]), 'accuracy': array([[0.99656774],
       [0.99490032],
       [0.99833979]])}
('Your symbol is:', 'RDSa.L', 'and the model date is:', '20170920')
20170922
skipping
{'precision_recall_fscore_support': arr

0.8158697863682605
{'precision_recall_fscore_support': array([[0.81586979, 0.81586979, 0.81586979,        nan],
       [0.53666516, 0.53666516, 0.53666516,        nan],
       [0.05274949, 0.05274949, 0.05274949,        nan]]), 'recall': array([[0.        ],
       [0.59935118],
       [0.9923703 ]]), 'F1-score': array([[0.        ],
       [0.59935118],
       [0.9923703 ]]), 'accuracy': array([[0.81586979],
       [0.59935118],
       [0.9923703 ]])}
20171205
0.7847942192899781
{'precision_recall_fscore_support': array([[0.81586979, 0.81586979, 0.81586979,        nan],
       [0.78479422, 0.78479422, 0.78479422,        nan],
       [0.05274949, 0.05274949, 0.05274949,        nan]]), 'recall': array([[0.        ],
       [0.08333333],
       [0.9923703 ]]), 'F1-score': array([[0.        ],
       [0.00145773],
       [0.9923703 ]]), 'accuracy': array([[0.81586979],
       [0.78479422],
       [0.9923703 ]])}
20171207
0.7798794777368597
{'precision_recall_fscore_support': array([[0.815

0.9516516516516517
{'precision_recall_fscore_support': array([[0.99230769, 0.99230769, 0.99230769,        nan],
       [0.95165165, 0.95165165, 0.95165165,        nan],
       [0.80216346, 0.80216346, 0.80216346,        nan]]), 'recall': array([[0.        ],
       [0.        ],
       [0.85195195]]), 'F1-score': array([[0.        ],
       [0.        ],
       [0.85195195]]), 'accuracy': array([[0.99230769],
       [0.95165165],
       [0.85195195]])}
20171222
0.9923325262308313
{'precision_recall_fscore_support': array([[0.99230769, 0.99230769, 0.99230769,        nan],
       [0.95165165, 0.95165165, 0.95165165,        nan],
       [0.99233253, 0.99233253, 0.99233253,        nan]]), 'recall': array([[0.],
       [0.],
       [0.]]), 'F1-score': array([[0.],
       [0.],
       [0.]]), 'accuracy': array([[0.99230769],
       [0.95165165],
       [0.99233253]])}
('Your symbol is:', 'RDSa.L', 'and the model date is:', '20171218')
20171222
skipping
{'precision_recall_fscore_support': arr

0.5346672466289691
{'precision_recall_fscore_support': array([[0.53466725, 0.53466725, 0.53466725,        nan],
       [0.81573239, 0.81573239, 0.81573239,        nan],
       [0.96878736, 0.96878736, 0.96878736,        nan]]), 'recall': array([[0.62903226],
       [0.67728303],
       [0.88064376]]), 'F1-score': array([[0.01437258],
       [0.67728303],
       [0.88064376]]), 'accuracy': array([[0.53466725],
       [0.67728303],
       [0.88064376]])}
20180208
0.7706695005313496
{'precision_recall_fscore_support': array([[0.53466725, 0.53466725, 0.53466725,        nan],
       [0.7706695 , 0.7706695 , 0.7706695 ,        nan],
       [0.96878736, 0.96878736, 0.96878736,        nan]]), 'recall': array([[0.62903226],
       [0.10526316],
       [0.88064376]]), 'F1-score': array([[0.01437258],
       [0.00369344],
       [0.88064376]]), 'accuracy': array([[0.53466725],
       [0.7706695 ],
       [0.88064376]])}
20180209
0.3646767708011135
{'precision_recall_fscore_support': array([[0.534

skipping
{'precision_recall_fscore_support': array([[0.42097123, 0.42097123, 0.42097123,        nan],
       [0.69369078, 0.69369078, 0.69369078,        nan],
       [0.46101279, 0.46101279, 0.46101279,        nan]]), 'recall': array([[0.42097123],
       [0.69369078],
       [0.46101279]]), 'F1-score': array([[0.42097123],
       [0.69369078],
       [0.46101279]]), 'accuracy': array([[0.42097123],
       [0.69369078],
       [0.46101279]])}
('Your symbol is:', 'RDSa.L', 'and the model date is:', '20180214')
20180216
0.002975557917109458
{'precision_recall_fscore_support': array([[0.00297556, 0.00297556, 0.00297556,        nan],
       [0.39385504, 0.39385504, 0.39385504,        nan],
       [0.458441  , 0.458441  , 0.458441  ,        nan]]), 'recall': array([[1.        ],
       [0.69369078],
       [0.46101279]]), 'F1-score': array([[0.00593346],
       [0.69369078],
       [0.46101279]]), 'accuracy': array([[0.00297556],
       [0.69369078],
       [0.46101279]])}
20180219
0.001598

skipping
{'precision_recall_fscore_support': array([[0.99840102, 0.99840102, 0.99840102,        nan],
       [0.99724409, 0.99724409, 0.99724409,        nan],
       [0.99836986, 0.99836986, 0.99836986,        nan]]), 'recall': array([[0.99840102],
       [0.99724409],
       [0.99836986]]), 'F1-score': array([[0.99840102],
       [0.99724409],
       [0.99836986]]), 'accuracy': array([[0.99840102],
       [0.99724409],
       [0.99836986]])}
20180227
skipping
{'precision_recall_fscore_support': array([[0.99840102, 0.99840102, 0.99840102,        nan],
       [0.99724409, 0.99724409, 0.99724409,        nan],
       [0.99836986, 0.99836986, 0.99836986,        nan]]), 'recall': array([[0.99840102],
       [0.99724409],
       [0.99836986]]), 'F1-score': array([[0.99840102],
       [0.99724409],
       [0.99836986]]), 'accuracy': array([[0.99840102],
       [0.99724409],
       [0.99836986]])}
20180228
skipping
{'precision_recall_fscore_support': array([[0.99840102, 0.99840102, 0.99840102,

0.9978165938864629
{'precision_recall_fscore_support': array([[0.99781659, 0.99781659, 0.99781659,        nan],
       [0.95740608, 0.95740608, 0.95740608,        nan],
       [0.70402756, 0.70402756, 0.70402756,        nan]]), 'recall': array([[0.        ],
       [0.99781659],
       [0.99814668]]), 'F1-score': array([[0.        ],
       [0.99781659],
       [0.99814668]]), 'accuracy': array([[0.99781659],
       [0.99781659],
       [0.99814668]])}
20180410
0.9981466772570824
{'precision_recall_fscore_support': array([[0.99781659, 0.99781659, 0.99781659,        nan],
       [0.99814668, 0.99814668, 0.99814668,        nan],
       [0.70402756, 0.70402756, 0.70402756,        nan]]), 'recall': array([[0.        ],
       [0.        ],
       [0.99814668]]), 'F1-score': array([[0.        ],
       [0.        ],
       [0.99814668]]), 'accuracy': array([[0.99781659],
       [0.99814668],
       [0.99814668]])}
20180411
0.9976640971735575
{'precision_recall_fscore_support': array([[0.997

In [187]:
pickle.load(open(results_loc, "rb"))

{'F1-score': array([[0.]]),
 'accuracy': array([[0.99768372]]),
 'precision_recall_fscore_support': array([[0.99768372, 0.99768372, 0.99768372,        nan]]),
 'recall': array([[0.]])}

In [186]:
fitted_models_results

{'F1-score': array([], shape=(0, 1), dtype=float64),
 'accuracy': array([], shape=(0, 1), dtype=float64),
 'precision_recall_fscore_support': array([], shape=(0, 4), dtype=float64),
 'recall': array([], shape=(0, 1), dtype=float64)}

In [59]:
fwd_dates_list_forward = [i for i in keys if i > model_date]
fwd_dates_list_backward = [i for i in keys if i < model_date + str(15)]

In [69]:
from datetime import datetime
from datetime import timedelta

In [62]:
datetime.strptime(model_date, '%Y%m%d')

datetime.datetime(2018, 4, 20, 0, 0)

In [63]:
from dateutil.parser import parse

In [79]:
parse(model_date) + datetime.timedelta(days=10)

AttributeError: type object 'datetime.datetime' has no attribute 'timedelta'

In [70]:
date_1 = datetime.strptime(model_date, '%Y%m%d')

end_date = date_1 + timedelta(days=10)

In [94]:
str(date_1)

'2018-04-20 00:00:00'

### manual check ###

In [89]:
os.listdir(symbol_labels_path)

['20170928.csv',
 '20171027.csv',
 '20170803.csv',
 '20170711.csv',
 '20170824.csv',
 '20170808.csv',
 '20171004.csv',
 '20170705.csv',
 '20171024.csv',
 '20180220.csv',
 '20170904.csv',
 '20170915.csv',
 '20170913.csv',
 '20171020.csv',
 '20170802.csv',
 '20171009.csv',
 '20170830.csv',
 '20171003.csv',
 '20170810.csv',
 '20170727.csv',
 '20170712.csv',
 '20170831.csv',
 '20170829.csv',
 '20180411.csv',
 '20170801.csv',
 '20170704.csv',
 '20170901.csv',
 '20171018.csv',
 '20170821.csv',
 '20180201.csv',
 '20170908.csv',
 '20170914.csv',
 '20180221.csv',
 '20170725.csv',
 '20170728.csv',
 '20171026.csv',
 '20180205.csv',
 '20170922.csv',
 '20180419.csv',
 '20180405.csv',
 '20170703.csv',
 '20170825.csv',
 '20180212.csv',
 '20170719.csv',
 '20171010.csv',
 '20170816.csv',
 '20180417.csv',
 '20180219.csv',
 '20170815.csv',
 '20180413.csv',
 '20170907.csv',
 '20170706.csv',
 '20170710.csv',
 '20171031.csv',
 '20171016.csv',
 '20180216.csv',
 '20180416.csv',
 '20170811.csv',
 '20170921.csv