In [1]:
# Import libraries
import numpy as np
import pandas as pd
from datetime import datetime
import copy

import networkx as nx

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

# Set paths
fig_path = './figures/'
data_path = './data/'
yf_path = './data/yf_data/'

------------------------------------
**Get the companies**
------------------------------------

Choose 4 sectors (`Communication Services, Consumer Discretionary, Financials, Information Technology`) and select the 3 companies with largest mcaps within each sector. Chosen assets for analysis is shown below.

In [2]:
# Read data
mydateparser = lambda x: datetime.strptime(x, "%Y-%m-%d")
snp = pd.read_csv(data_path+"snp_allstocks_2015_2019.csv", index_col='Date', parse_dates=True, date_parser=mydateparser)
info = pd.read_csv(data_path+'snp_info.csv', index_col=0)


# https://www.slickcharts.com/sp500
# https://datahub.io/core/s-and-p-500-companies-financials
detailed_info = pd.read_csv(data_path+'constituents-financials.csv', index_col=0)
stocks_sorted = detailed_info.sort_values('Market Cap', ascending=False)['Sector']

In [3]:
stocks_by_sector = {sector: [] for sector in info['GICS Sector'].unique()}

for stock in stocks_sorted.index[:160]:
    if stock in ['PCLN', 'TWX', 'AET', 'MON', 'PX', 'ESRX']:
        continue
    stock = 'BRK-B' if stock == 'BRK.B' else stock
    
    sector = info.set_index('Symbol').loc[stock]['GICS Sector']
    stocks_by_sector[sector].append(stock)
# stocks

In [4]:
# Select top three stocks (by market cap) within each GICS Sector
stocks = {}
for sector in stocks_by_sector:
    stocks[sector] = stocks_by_sector[sector][:3]

# use a subset of stocks instead for easy understanding
# order sectors alphabetically
tmp = {}
tmp['Communication Services'] = stocks['Communication Services']
tmp['Consumer Discretionary'] = stocks['Consumer Discretionary']
tmp['Financials'] = stocks['Financials']
tmp['Information Technology'] = stocks['Information Technology']

stocks = tmp
stocks

{'Communication Services': ['GOOGL', 'GOOG', 'FB'],
 'Consumer Discretionary': ['AMZN', 'HD', 'MCD'],
 'Financials': ['JPM', 'BAC', 'WFC'],
 'Information Technology': ['AAPL', 'MSFT', 'V']}

------------------------------------
**Get the data and the indicators**
------------------------------------
We collect the stock data from yfinance for between dates `'2015-01-02'-'2019-01-01'` (4 years) and calculate certain indicators helpful for analysis, which are:
- Relative Strength Index (RSI)
- Stochastic Oscillator
- Williams %R
- Moving Average Convergence Divergnece (MACD)
- Price Rate Of Change
- On Balance Volume

After calculating these indicators for each day, we add them to the dataframe of the stock as new columns. We select the columns `['Close', 'RSI', 'k_percent', 'r_percent', 'MACD', 'MACD_EMA9', 'Price Rate Of Change', 'On Balance Volume']` at the end to obtain a new dataframe representing the stock.

We do this for each of the stocks we have selected and concatenate the individual dataframes to obtain a a resulting dataframe of shape `(T, NxM)`, where `T` is the number of time steps, `N` the number of assets, `M` the number of indicators for each asset.

We also standardize the names of individual assets (e.g. `asset_1`) for tensorization.

In [5]:
# Calculate n-day RSI
def get_RSI(data, n=14):
    # First make a copy of the data frame twice
    up_df, down_df = data['change_in_price'].copy(), data['change_in_price'].copy()
    
    # For up days, if the change is less than 0 set to 0.
    up_df[up_df < 0] = 0
    # For down days, if the change is greater than 0 set to 0.
    down_df[down_df > 0] = 0
    # We need change_in_price to be absolute.
    down_df = down_df.abs()
    
    # Calculate the EWMA (Exponential Weighted Moving Average)
    ewma_up = up_df.ewm(span=n).mean()
    ewma_down = down_df.ewm(span=n).mean()
    
    # Calculate the Relative Strength
    relative_strength = ewma_up / ewma_down

    # Calculate the Relative Strength Index
    relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))
    
    return relative_strength_index



# Calculate the n-day Stochastic Oscillator
def get_Stochastic_Oscillator(data, n=14):
    # Make a copy of the high and low column.
    low, high = data['Low'].copy(), data['High'].copy()
    
    # Calculate rolling min and max.
    low = low.rolling(window=n).min()
    high = high.rolling(window=n).max()
    
    # Calculate the Stochastic Oscillator.
    k_percent = 100 * ((data['Close'] - low) / (high - low))
    
    return k_percent



# Calculate the Williams %R
def get_Williams(data, n=14):
    # Make a copy of the high and low column.
    low, high = data['Low'].copy(), data['High'].copy()
    
    # Calculate rolling min and max.
    low = low.rolling(window=n).min()
    high = high.rolling(window=n).max()
    
    # Calculate William %R indicator.
    r_percent = ((high - data['Close']) / (high - low)) * -100
    
    return r_percent



# Calculate the MACD
def get_MACD(data):
    ema_26 = data['Close'].ewm(span=26).mean()
    ema_12 = data['Close'].ewm(span=12).mean()

    macd = ema_12 - ema_26

    # Calculate the EMA of MACD
    ema_9_macd = macd.ewm(span=9).mean()
    
    return macd, ema_9_macd
    

    
# Calculate On Balance Volume
def get_OBV(data):
    volumes = data['Volume']
    changes = data['change_in_price']

    prev_obv = 0
    obv_values = []
    for change, volume in zip(changes, volumes):
        if change > 0:
            current_obv = prev_obv + volume
        elif change < 0:
            current_obv = prev_obv - volume
        else:
            current_obv = prev_obv

        obv_values.append(current_obv)
        prev_obv = current_obv

    return pd.Series(obv_values, index=data.index)    

Note that `labels` are set according to the price change $n$ days-out (set to be 9 below).

In [6]:
samples = []
labels = []
for sector in stocks:
    for i, stock in enumerate(stocks[sector]):
        
        stock_name = 'asset_' + str(i+1)
        
        # get the original data
        data = pd.read_csv(yf_path+stock+".csv", index_col='Date', parse_dates=True, date_parser=mydateparser)

        # calculate change in price
        data['change_in_price'] = data['Close'].diff()

        # calculate indicators
        data['RSI'] = get_RSI(data)
        data['k_percent'] = get_Stochastic_Oscillator(data)
        data['r_percent'] = get_Williams(data)

        # Calculate the MACD
        macd, ema_9_macd = get_MACD(data)
        data['MACD'] = macd
        data['MACD_EMA9'] = ema_9_macd

        # Calculate the 9-day Price Rate of Change
        data['Price Rate Of Change'] = data['Close'].pct_change(periods=9)

        # Calculate On Balance Volume
        data['On Balance Volume'] = get_OBV(data)

        # Create the predicition column (To keep this as a binary classifier we'll consider flat days as up days)
        days_out = 9
        data['Prediction'] = np.sign(np.sign(data['Close'].shift(-days_out) - data['Close']) + 1.)

        # Drop rows with NaN.
        data = data.dropna()

        X_i = data[['Close', 'RSI', 'k_percent', 'r_percent', 'MACD', 'MACD_EMA9', 'Price Rate Of Change', 'On Balance Volume']].copy()
        X_i.columns = [[sector]*len(X_i.columns), [stock_name]*len(X_i.columns), X_i.columns]
        
        y_i = data['Prediction'].copy()
        y_i.name = stock
        
        samples.append(X_i)
        labels.append(y_i)

        
samples = pd.concat(samples, axis=1)
samples.columns.names = ['Sector', 'Asset', 'Metrics']

labels = pd.concat(labels, axis=1)

In [7]:
samples.head()

Sector,Communication Services,Communication Services,Communication Services,Communication Services,Communication Services,Communication Services,Communication Services,Communication Services,Communication Services,Communication Services,...,Information Technology,Information Technology,Information Technology,Information Technology,Information Technology,Information Technology,Information Technology,Information Technology,Information Technology,Information Technology
Asset,asset_1,asset_1,asset_1,asset_1,asset_1,asset_1,asset_1,asset_1,asset_2,asset_2,...,asset_2,asset_2,asset_3,asset_3,asset_3,asset_3,asset_3,asset_3,asset_3,asset_3
Metrics,Close,RSI,k_percent,r_percent,MACD,MACD_EMA9,Price Rate Of Change,On Balance Volume,Close,RSI,...,Price Rate Of Change,On Balance Volume,Close,RSI,k_percent,r_percent,MACD,MACD_EMA9,Price Rate Of Change,On Balance Volume
Date,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2015-01-22,537.299988,78.297375,96.786901,-3.213099,1.956715,-0.117862,0.059951,-33200,532.926819,79.627212,...,-0.009666,-94025500,64.400002,44.509507,40.00004,-59.99996,-0.143929,-0.121585,-0.025903,13929600
2015-01-23,541.950012,80.642355,93.651444,-6.348556,3.404653,0.612332,0.082341,2265100,538.471619,82.229428,...,-0.000212,-67813900,64.572502,47.916069,47.449404,-52.550596,-0.118848,-0.121018,-0.008598,20096400
2015-01-26,536.719971,70.724965,84.055032,-15.944968,4.143347,1.338989,0.079789,718500,533.744629,73.051049,...,0.008798,-110339400,64.1325,40.583452,35.150242,-64.849758,-0.125008,-0.121839,-0.013422,11544000
2015-01-27,521.190002,49.758853,55.559662,-44.440338,3.69422,1.820886,0.038641,-1238900,517.210022,50.362629,...,-0.07981,-279503400,62.747501,26.085235,13.987256,-86.012744,-0.213865,-0.140668,-0.037541,516400
2015-01-28,512.429993,41.711036,39.48624,-60.51376,2.752592,2.010646,0.012848,-3030000,508.603638,42.445246,...,-0.103786,-364010500,61.59,19.401479,0.307522,-99.692478,-0.352269,-0.183765,-0.035999,-10914400


In [8]:
labels.head()

Unnamed: 0_level_0,GOOGL,GOOG,FB,AMZN,HD,MCD,JPM,BAC,WFC,AAPL,MSFT,V
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-01-22,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0
2015-01-23,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
2015-01-26,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
2015-01-27,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
2015-01-28,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


------------------------------------
**GLTD**
------------------------------------

In [9]:
import scipy.linalg
import scipy.sparse.linalg

from hottbox.core import Tensor, TensorTKD
from hottbox.algorithms.decomposition import HOSVD, HOOI
from hottbox.utils.generation import residual_tensor

In [10]:
# Implemented for third order tensor decomposition
# Last mode should be the regularized mode
class GLTD:
    def __init__(self, max_iter=50, epsilon=1e-2, tol=1e-4, verbose=False):
        self.max_iter = max_iter
        self.epsilon = epsilon
        self.tol = tol
        self.verbose = verbose

        
        
    def decompose(self, tensor, rank):
        
        if not isinstance(tensor, Tensor):
            raise TypeError("Parameter `tensor` should be an object of `Tensor` class!")
        if not isinstance(rank, tuple):
            raise TypeError("Parameter `rank` should be passed as a tuple!")
        if tensor.order != len(rank):
            raise ValueError("Parameter `rank` should be a tuple of same length as the order of a tensor:\n"
                             "{} != {} (tensor.order != len(rank))".format(tensor.order, len(rank)))
        
        converged = False
        cost = []
        tensor_tkd = None
        fmat_gltd = self._init_fmat(tensor, rank)
        norm = tensor.frob_norm
        for n_iter in range(self.max_iter):

            # Update factor matrices
            # step 1
            V, W = fmat_gltd[1], fmat_gltd[2]
            VVT = np.dot(V, V.T)
            WWT = np.dot(W, W.T)

            A = tensor.mode_n_product(VVT, mode=1, inplace=False)
            B = tensor.mode_n_product(WWT, mode=2, inplace=False)

            n = tensor.shape[0]
            F = np.zeros((n,n))

            for i in range(n):
                for j in range(n):
                    F[i,j] = np.trace(np.dot(A[i,:,:].T, B[j,:,:]))

            U, _, _ = scipy.linalg.svd(F)
            U = U[:,:rank[0]]
            fmat_gltd[0] = U

            # step 2
            U, W = fmat_gltd[0], fmat_gltd[2]
            UUT = np.dot(U, U.T)
            WWT = np.dot(W, W.T)

            A = tensor.mode_n_product(UUT, mode=0, inplace=False)
            B = tensor.mode_n_product(WWT, mode=2, inplace=False)

            n = tensor.shape[1]
            G = np.zeros((n,n))

            for i in range(n):
                for j in range(n):
                    G[i,j] = np.trace(np.dot(A[:,i,:].T, B[:,j,:]))

            V, _, _ = scipy.linalg.svd(G)
            V = V[:,:rank[1]]
            fmat_gltd[1] = V

            # step 3
            U, V = fmat_gltd[0], fmat_gltd[1]
            UUT = np.dot(U, U.T)
            VVT = np.dot(V, V.T)

            A = tensor.mode_n_product(UUT, mode=0, inplace=False)
            B = tensor.mode_n_product(VVT, mode=1, inplace=False)

            n = tensor.shape[2]
            H = np.zeros((n,n))

            for i in range(n):
                for j in range(n):
                    H[i,j] = np.trace(np.dot(A[:,:,i].T, B[:,:,j]))

            # add regularization here
            W, _, _ = scipy.linalg.svd(H)
            W = W[:,:rank[2]]
            fmat_gltd[2] = W


            # Update core
            core = tensor.copy()
            for mode, fmat in enumerate(fmat_gltd):
                core.mode_n_product(fmat.T, mode=mode)


            # Update cost
            tensor_tkd = TensorTKD(fmat=fmat_gltd, core_values=core.data)
            residual = residual_tensor(tensor, tensor_tkd)
            cost.append(abs(residual.frob_norm / norm))

            # Check termination conditions
            if cost[-1] <= self.epsilon:
                if self.verbose:
                    print('Relative error of approximation has reached the acceptable level: {}'.format(cost[-1]))
                break
            if len(cost) >= 2 and abs(cost[-2] - cost[-1]) <= self.tol:
                converged = True
                if self.verbose:
                    print('Converged in {} iteration(s)'.format(len(cost)))
                break
                
        if not converged and cost[-1] > self.epsilon:
            print('Maximum number of iterations ({}) has been reached. '
                  'Variation = {}'.format(self.max_iter, abs(cost[-2] - cost[-1])))
            
        return tensor_tkd
    

    
    def _init_fmat(self, tensor, rank):
        """ Initialisation of factor matrices

        Parameters
        ----------
        tensor : Tensor
            Multidimensional data to be decomposed
        rank : tuple
            Desired multilinear rank for the given `tensor`

        Returns
        -------
        fmat : list[np.ndarray]
            List of factor matrices
        """
#         hosvd = HOSVD()
#         tensor_hosvd = hosvd.decompose(tensor, rank)
#         fmat = tensor_hosvd.fmat

        #init fmat as the identity matrix
        fmat = []
        fmat.append(np.identity(tensor.shape[0])[:,:rank[0]])
        fmat.append(np.identity(tensor.shape[1])[:,:rank[1]])
        fmat.append(np.identity(tensor.shape[2])[:,:rank[2]])

        return fmat

------------------------------------
**Tensor Ensemble Learning**
------------------------------------

In [11]:
### Evaluates the confidence in the predicted downturns
def downturn_confidence(actual, predicted):
    n = 0
    x = 0
    for i in range(len(actual)):
        if predicted[i] == 0:
            n += 1
            if predicted[i] == actual[i]:
                x += 1
    
    return None if n == 0 else (n, x, x/n)

# Helper function to display scores of multiclass classification
def print_scores(scores):
    result = []
    for score in scores:
        s = "{:.2f}%".format(score * 100)
        result.append(s)
        
    print('[' + ", ".join(result) + ']')
    
def print_1_percentage(y):
    percentages = sum(y == 1.)/len(y)
    percentages = list(percentages) if n_classes > 1 else [percentages]

    print_scores(percentages)

In [12]:
from hottbox.core import Tensor, TensorTKD
from hottbox.pdtools import pd_to_tensor
from hottbox.algorithms.decomposition import HOSVD, HOOI
# from hottbox.utils.generation import residual_tensor
from hottbox.algorithms.classification import TelVI

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [13]:
X = []
for i in range(len(samples)):
    X_t = samples.iloc[i].reorder_levels(['Metrics', 'Asset', 'Sector']) #Make 'Sector' the third mode of the tensor
    X.append(pd_to_tensor(X_t))

y = np.array(labels)

print(X[0], '\n\n', y[0])

This tensor is of order 3 and consists of 96 elements.
Sizes and names of its modes are (8, 3, 4) and ['Metrics', 'Asset', 'Sector'] respectively. 

 [0. 0. 0. 1. 1. 1. 0. 0. 0. 1. 0. 1.]


In [14]:
class GRTEL:
    def __init__(self, base_clfs, n_classes=1, probability=False, verbose=False):
        self.probability = probability
        self.verbose = verbose
        self.n_classes = n_classes
        self.models = [TelVI(base_clf=base_clfs[i], probability=self.probability, verbose=self.verbose) for i in range(self.n_classes)]
        
    def fit(self, X, y):
        if n_classes == 1:
            self.models[0].fit(X, y)
        elif n_classes > 1:
            for i in range(self.n_classes):
                print(i, end=" - ")
                self.models[i].fit(X, y[:,i])
            print()
        
    def score(self, X, y):
        if n_classes == 1:
            return self.models[0].score(X, y)
        elif n_classes > 1:
            scores = []
            for i in range(self.n_classes):
                scores.append(self.models[i].score(X, y[:, i]))
            return scores
    
    def grid_search(self, X, y, search_params):
        if n_classes == 1:
            self.models[0].grid_search(X, y, search_params)
        elif n_classes > 1:
            for i in range(self.n_classes):
                print(i, end=" - ")
                self.models[i].grid_search(X, y[:,i], search_params)
            print()
                
    def predict(self, X):
        predictions = []
        for i in range(self.n_classes):
            predictions.append(self.models[i].predict(X))
        return predictions

In [15]:
# Represent each sample in Tucker form and store it in a list
# algo = HOOI()
algo = GLTD()
rank = (4,2,2)
X_tk = [algo.decompose(sample, rank=rank) for sample in X]


# Split into train and test set
X_train, X_test, y_train, y_test = train_test_split(X_tk, y, test_size=0.25, random_state=42)

# test_size = 0.25
# k = int(len(X_tk) * (1. - test_size))

# X_train, X_test = X_tk[:k], X_tk[k:]
# y_train, y_test = y[:k], y[k:]

In [16]:
# Initialise classifier
R = np.sum(rank) # number of base classifiers required per class
n_classes = 1 if y.ndim == 1 else y.shape[1]

base_clfs = []
for _ in range(n_classes):
    base_clfs.append([DecisionTreeClassifier() for _ in range(R)])

grtel = GRTEL(base_clfs=base_clfs,
              n_classes=n_classes,
              probability=True,
              verbose=False)


# Train classifer
grtel.fit(X_train, y_train)


#Scores
score = grtel.score(X_train, y_train)
score = score if n_classes > 1 else [score]
print("\nClassification accuracy (Train):")
print_scores(score); print()

score = grtel.score(X_test, y_test)
score = score if n_classes > 1 else [score]
print("Classification accuracy (Test):")
print_scores(score); print()

print("Percentage of 1s (Test):")
print_1_percentage(y_test); print()

0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 

Classification accuracy (Train):
[100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%, 100.00%]

Classification accuracy (Test):
[68.29%, 66.67%, 75.20%, 77.24%, 77.24%, 72.36%, 75.61%, 75.20%, 79.67%, 76.83%, 70.73%, 75.20%]

Percentage of 1s (Test):
[58.54%, 58.13%, 58.13%, 64.23%, 58.94%, 63.01%, 56.91%, 55.28%, 55.69%, 53.66%, 59.35%, 58.54%]



In [17]:
results = []
for i in range(12):
    results.append(downturn_confidence(y_test[:,i], grtel.models[i].predict(X_test)))
results

[(84, 54, 0.6428571428571429),
 (101, 63, 0.6237623762376238),
 (90, 66, 0.7333333333333333),
 (87, 60, 0.6896551724137931),
 (101, 72, 0.7128712871287128),
 (80, 51, 0.6375),
 (100, 74, 0.74),
 (95, 72, 0.7578947368421053),
 (135, 97, 0.7185185185185186),
 (99, 79, 0.797979797979798),
 (96, 64, 0.6666666666666666),
 (78, 60, 0.7692307692307693)]

In [18]:
results = []
for i in range(12):
    results.append(downturn_confidence(y_train[:,i], grtel.models[i].predict(X_train)))
results

[(317, 317, 1.0),
 (307, 307, 1.0),
 (303, 303, 1.0),
 (263, 263, 1.0),
 (308, 308, 1.0),
 (284, 284, 1.0),
 (315, 315, 1.0),
 (320, 320, 1.0),
 (359, 359, 1.0),
 (308, 308, 1.0),
 (287, 287, 1.0),
 (265, 265, 1.0)]

-------------
Grid Search:
-------------

In [19]:
max_features = ['auto', 'sqrt', None, 'log2']
max_depth = list(range(10, 70, 10))
max_depth.append(None)
min_samples_split = [2, 5, 10, 20, 30]
min_samples_leaf = [1, 3, 5, 7, 12, 14]

search_grid = {'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}

search_params = [search_grid for _ in range(R)]

print("\tPerforming grid search for each base classifer and for each class")
grtel.grid_search(X_train, y_train, search_params)

print("\tTrain base classifiers with optimal hyperparameters")
grtel.fit(X_train, y_train); print()

score = grtel.score(X_train, y_train)
score = score if n_classes > 1 else [score]
print("Classification accuracy (Train):")
print_scores(score); print()

score = grtel.score(X_test, y_test)
score = score if n_classes > 1 else [score]
print("Classification accuracy (Test):")
print_scores(score); print()

print("Percentage of 1s (Test):")
print_1_percentage(y_test); print()

	Performing grid search for each base classifer and for each class
0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 
	Train base classifiers with optimal hyperparameters
0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 

Classification accuracy (Train):
[95.39%, 100.00%, 99.73%, 97.29%, 98.51%, 98.78%, 99.32%, 99.32%, 99.59%, 97.02%, 91.87%, 99.46%]

Classification accuracy (Test):
[65.04%, 71.14%, 71.54%, 76.42%, 76.02%, 76.42%, 78.05%, 78.05%, 78.86%, 76.83%, 67.07%, 70.73%]

Percentage of 1s (Test):
[58.54%, 58.13%, 58.13%, 64.23%, 58.94%, 63.01%, 56.91%, 55.28%, 55.69%, 53.66%, 59.35%, 58.54%]



In [20]:
results = []
for i in range(12):
    results.append(downturn_confidence(y_test[:,i], grtel.models[i].predict(X_test)))
results

[(96, 62, 0.6458333333333334),
 (105, 66, 0.6285714285714286),
 (84, 59, 0.7023809523809523),
 (69, 48, 0.6956521739130435),
 (82, 64, 0.7804878048780488),
 (77, 51, 0.6623376623376623),
 (83, 67, 0.8072289156626506),
 (103, 79, 0.7669902912621359),
 (122, 85, 0.6967213114754098),
 (89, 67, 0.7528089887640449),
 (74, 45, 0.6081081081081081),
 (39, 31, 0.7948717948717948)]

In [21]:
results = []
for i in range(12):
    results.append(downturn_confidence(y_train[:,i], grtel.models[i].predict(X_train)))
results

[(307, 275, 0.8957654723127035),
 (309, 306, 0.9902912621359223),
 (295, 294, 0.9966101694915255),
 (231, 222, 0.961038961038961),
 (292, 286, 0.9794520547945206),
 (258, 253, 0.9806201550387597),
 (300, 295, 0.9833333333333333),
 (313, 309, 0.987220447284345),
 (363, 348, 0.9586776859504132),
 (284, 272, 0.9577464788732394),
 (258, 227, 0.8798449612403101),
 (230, 229, 0.9956521739130435)]