# CNN Netflix

In [70]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense, Normalization
%matplotlib inline
import seaborn as sns
from keras.models import Sequential 
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, InputLayer, SimpleRNN
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping

from sklearn import (
    linear_model, metrics, neural_network, pipeline, model_selection
)
from sklearn.impute import SimpleImputer

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import TimeSeriesSplit

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
# %pip install -q -U keras-tuner
import keras_tuner as kt

In [2]:
Netflix = pd.read_csv("/Users/jackbrennan/Documents/GitHub/Stock-Predictions/Alex/Netflix_model_ready.csv")
Netflix.date = pd.to_datetime(Netflix.date)
Netflix = Netflix.set_index("date")
Netflix = Netflix.drop(["Nas_total", 'Stock_total', 'Dow_total'],axis=1) # to remove duplicated columns
pd.DataFrame(Netflix.isna().sum()).T

Unnamed: 0,Netflix. Inc,Netflix_x,Netflix Stock,Streaming media,Reed Hastings_x,Open,High,Low,Close,Volume,...,Dow_MAvg_s_Move,Dow_EMA_Move,Dow_Disparity_Move,Dow_Disparity_s_Move,Dow_RSI_Move,target_1,target_2,target_3,target_4,target_5
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
Netflix = Netflix.dropna()
Netflix = Netflix[~(Netflix.isin([np.inf, -np.inf]).any(axis=1))] 

In [4]:
Netflix = Netflix.drop(['target_1', 'target_2', 'target_4', 'target_5'], axis=1)
target_3 = Netflix["target_3"]

#splitting into training sets 
column_indices = {name: i for i, name in enumerate(Netflix.columns)}

n = len(Netflix)
X_train = Netflix[0:int(n*0.7)]
X_val = Netflix[int(n*0.7):int(n*0.9)]
X_test = Netflix[int(n*0.9):]

y_train = target_3[0:int(n*0.7)]
y_val = target_3[int(n*0.7):int(n*0.9)]
y_test = target_3[int(n*0.9):]

In [5]:
Mscaler = MinMaxScaler() # keeps binarys at zero and 1 :)

X_train = pd.DataFrame(Mscaler.fit_transform(X_train), columns = Netflix.columns)
X_val = pd.DataFrame(Mscaler.fit_transform(X_val), columns = Netflix.columns)
X_test = pd.DataFrame(Mscaler.fit_transform(X_test), columns = Netflix.columns)

In [6]:
def kbest_creator(k, df, df_val, df_test):
    """
    returns data frame of principle componets of # of k best features 
    """
    # apply SelectKBest class to extract top 40 best features
    bestfeatures = SelectKBest(score_func=f_regression, k=k)
    best_fit = bestfeatures.fit(df, y_train)
    best_scores = pd.DataFrame(best_fit.scores_)
    best_columns = pd.DataFrame(Netflix.columns)
    
    # concatenate the dataframes for better visualization
    features_score = pd.concat([best_columns, best_scores], axis=1)
    features_score.columns = ['Features', 'Score']  # naming the dataframe columns
    feats_kb = list(features_score.nlargest(k, 'Score').iloc[1:k]['Features'])

    pca = PCA().fit(df[feats_kb])
    pca_scores = np.cumsum(pca.explained_variance_ratio_) * 100
    res = next(x for x, val in enumerate(pca_scores) if val > 85)
    res = res + 1
    res
    
    pca_kb_1 = PCA(n_components = res).fit(df[feats_kb].to_numpy())
    df = pca_kb_1.transform(df[feats_kb].to_numpy())
    df_val = pca_kb_1.transform(df_val[feats_kb].to_numpy())
    df_test = pca_kb_1.transform(df_test[feats_kb].to_numpy())
    return df, df_val, df_test


In [7]:
X_train_kb_10, X_val_kb_10, X_test_kb_10 = kbest_creator(10, X_train, X_val, X_test) 
X_train_kb_25, X_val_kb_25, X_test_kb_25 = kbest_creator(25, X_train, X_val, X_test) 
X_train_kb_40, X_val_kb_40, X_test_kb_40 = kbest_creator(40, X_train, X_val, X_test) 
X_train_kb_55, X_val_kb_55, X_test_kb_55 = kbest_creator(55, X_train, X_val, X_test) 

  correlation_coefficient /= X_norms
  correlation_coefficient /= X_norms
  correlation_coefficient /= X_norms
  correlation_coefficient /= X_norms


# Model Testing

In [8]:
early_stopping_monitor = EarlyStopping(patience=4)

In [9]:
def df_to_X_y2(df, target, window_size=5):
  # converts to matrix of numpy arrays
  X = []
  y = []
  for i in range(len(df)-window_size): # length of data frame - window_size so it does't take empty values at the end, 
    # does force you to loose the last 5 values, could fix with padding
    row = [r for r in df[i:i+window_size]] # grabs row i and all rows above within the window size length
    X.append(row) # creates 3 dimentional array, (# obseravtions, # rows in window, # features)
    label = target[i+window_size] # pulls the target variable after the window, target varible needs to be column zero in this 
    y.append(label) # returns (N,) martix of targets i+window_length time periods away
  return np.array(X), np.array(y)

In [10]:
# converting to window format, in this case 5 periods
X_train_kb_40, train_5w = df_to_X_y2(X_train_kb_40,y_train)
X_val_kb_40, val_5w = df_to_X_y2(X_val_kb_40, y_val)
X_test_kb_40, test_5w = df_to_X_y2(X_test_kb_40,y_test) 

X_train_kb_10, _ = df_to_X_y2(X_train_kb_10,y_train)
X_val_kb_10, _ = df_to_X_y2(X_val_kb_10, y_val)
X_test_kb_10, _ = df_to_X_y2(X_test_kb_10,y_test) 

X_train_kb_25, _ = df_to_X_y2(X_train_kb_25,y_train)
X_val_kb_25, _ = df_to_X_y2(X_val_kb_25, y_val)
X_test_kb_25, _ = df_to_X_y2(X_test_kb_25,y_test) 

X_train_kb_55, _ = df_to_X_y2(X_train_kb_55,y_train)
X_val_kb_55, _ = df_to_X_y2(X_val_kb_55, y_val)
X_test_kb_55, _ = df_to_X_y2(X_test_kb_55,y_test) 

### Model Format 1

## Hyper Parameter tuned Model 1

In [78]:
# Model 1_1 - 1_4, using differnt k best pca variables

def model_builder_1_1(hp):
    n_steps = X_train_kb_10.shape[1]
    n_features = X_train_kb_10.shape[2]

    model1_1 = Sequential()

    hp_filters = hp.Int('units', min_value=4, max_value=128, step=4)
    hp_units = hp.Int('units', min_value=5, max_value=100, step=10)
    model1_1.add(Conv1D(filters=hp_filters, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

    model1_1.add(Flatten())
    model1_1.add(Dense(units = hp_units, activation='relu')) 
    model1_1.add(Dense(1, activation='sigmoid'))

    model1_1.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

    return model1_1

tuner = kt.Hyperband(model_builder_1_1,
                     objective='val_binary_accuracy',
                     max_epochs=30,
                     factor=3,
                     # directory='my_dir',    don't know what to do with this for now
                     # project_name='intro_to_kt' # sasme with this
                     )

tuner.search(X_train_kb_10, train_5w, epochs=50, validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

#model1_1.fit(X_train_kb_10, train_5w,epochs=30,  validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

INFO:tensorflow:Reloading Oracle from existing project ./untitled_project/oracle.json
INFO:tensorflow:Reloading Tuner from ./untitled_project/tuner0.json
INFO:tensorflow:Oracle triggered exit


In [79]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
best_hps

<keras_tuner.engine.hyperparameters.HyperParameters at 0x15b2622e0>

In [75]:
model_1_1 = tuner.hypermodel.build(best_hps)
history = model_1_1.fit(X_train_kb_10, train_5w, epochs=30, validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


In [None]:
y_hat1_1 = model_1_1.predict(X_test_kb_10)
y_hat1_1 = y_hat1_1 > .5
metrics.accuracy_score(test_5w,y_hat1_1)

## Normal Models

In [82]:
n_steps = X_train_kb_10.shape[1]
n_features = X_train_kb_10.shape[2]

model1_1 = Sequential()


model1_1.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model1_1.add(Flatten())
model1_1.add(Dense(units = 15, activation='relu')) 
model1_1.add(Dense(1, activation='sigmoid'))

model1_1.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model1_1.fit(X_train_kb_10, train_5w,epochs=30,  validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<keras.callbacks.History at 0x15d8cc7f0>

In [76]:
y_hat1_1 = model1_1.predict(X_test_kb_10)
y_hat1_1 = y_hat1_1 > .5
metrics.accuracy_score(test_5w,y_hat1_1)

0.4117647058823529

In [13]:
n_steps = X_train_kb_25.shape[1]
n_features = X_train_kb_25.shape[2]

model1_2 = Sequential()
model1_2.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model1_2.add(Flatten())
model1_2.add(Dense(25, activation='relu')) 
model1_2.add(Dense(1, activation='sigmoid'))

model1_2.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model1_2.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x15923d2b0>

In [14]:
y_hat1_2 = model1_2.predict(X_test_kb_25)
y_hat1_2 = y_hat1_2 > .5
metrics.accuracy_score(test_5w,y_hat1_2)

0.5

In [15]:
n_steps = X_train_kb_40.shape[1]
n_features = X_train_kb_40.shape[2]

model1_3 = Sequential()
model1_3.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model1_3.add(Flatten())
model1_3.add(Dense(25, activation='relu')) 
model1_3.add(Dense(1, activation='sigmoid'))

model1_3.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model1_3.fit(X_train_kb_40, train_5w,epochs=30,  validation_data=(X_val_kb_40, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x1593f2f40>

In [16]:
y_hat1_3 = model1_3.predict(X_test_kb_40)
y_hat1_3 = y_hat1_3 > .5
metrics.accuracy_score(test_5w,y_hat1_3)

0.45588235294117646

In [17]:
n_steps = X_train_kb_55.shape[1]
n_features = X_train_kb_55.shape[2]

model1_4 = Sequential()
model1_4.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model1_4.add(Flatten())
model1_4.add(Dense(25, activation='relu')) 
model1_4.add(Dense(1, activation='sigmoid'))

model1_4.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model1_4.fit(X_train_kb_55, train_5w,epochs=30,  validation_data=(X_val_kb_55, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<keras.callbacks.History at 0x159547cd0>

In [18]:
y_hat1_4 = model1_4.predict(X_test_kb_55)
y_hat1_4 = y_hat1_4 > .5
metrics.accuracy_score(test_5w,y_hat1_4)

0.4264705882352941

### Model 2, Less Filters

In [19]:
# Model 2_1 - 2_4, using differnt k best pca variables. reduced filters
n_steps = X_train_kb_10.shape[1]
n_features = X_train_kb_10.shape[2]

model2_1 = Sequential()
model2_1.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_1.add(Flatten())
model2_1.add(Dense(25, activation='relu')) 
model2_1.add(Dense(1, activation='sigmoid'))

model2_1.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_1.fit(X_train_kb_10, train_5w,epochs=30,  validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


<keras.callbacks.History at 0x159667cd0>

In [20]:
y_hat2_1 = model2_1.predict(X_test_kb_10)
y_hat2_1 = y_hat2_1 > .5
metrics.accuracy_score(test_5w,y_hat2_1)



0.39705882352941174

In [21]:
n_steps = X_train_kb_25.shape[1]
n_features = X_train_kb_25.shape[2]

model2_2 = Sequential()
model2_2.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_2.add(Flatten())
model2_2.add(Dense(25, activation='relu')) 
model2_2.add(Dense(1, activation='sigmoid'))

model2_2.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_2.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x1597dac70>

In [22]:
y_hat2_2 = model2_2.predict(X_test_kb_25)
y_hat2_2 = y_hat2_2 > .5
metrics.accuracy_score(test_5w,y_hat2_2)



0.4852941176470588

In [23]:
n_steps = X_train_kb_40.shape[1]
n_features = X_train_kb_40.shape[2]

model2_3 = Sequential()
model2_3.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_3.add(Flatten())
model2_3.add(Dense(25, activation='relu')) 
model2_3.add(Dense(1, activation='sigmoid'))

model2_3.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_3.fit(X_train_kb_40, train_5w,epochs=30,  validation_data=(X_val_kb_40, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x1598ca0d0>

In [24]:
y_hat2_3 = model2_3.predict(X_test_kb_40)
y_hat2_3 = y_hat2_3 > .5
metrics.accuracy_score(test_5w,y_hat2_3)

0.45588235294117646

In [25]:
n_steps = X_train_kb_55.shape[1]
n_features = X_train_kb_55.shape[2]

model2_4 = Sequential()
model2_4.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_4.add(Flatten())
model2_4.add(Dense(25, activation='relu')) 
model2_4.add(Dense(1, activation='sigmoid'))

model2_4.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_4.fit(X_train_kb_55, train_5w,epochs=30,  validation_data=(X_val_kb_55, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x159ad2f70>

In [26]:
y_hat2_4 = model2_4.predict(X_test_kb_55)
y_hat2_4 = y_hat2_4 > .5
metrics.accuracy_score(test_5w,y_hat2_4)

0.4411764705882353

### Model 3, Adding layers

In [27]:
# Model 3_1 - 3_4, adding layers
n_steps = X_train_kb_10.shape[1]
n_features = X_train_kb_10.shape[2]

model3_1 = Sequential()
model3_1.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_1.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_1.add(MaxPooling1D(pool_size=2))
model3_1.add(Flatten())
model3_1.add(Dense(8, activation='relu')) 
model3_1.add(Dense(1, activation='sigmoid'))

model3_1.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_1.fit(X_train_kb_10, train_5w,epochs=30,  validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x159c3dfa0>

In [28]:
y_hat3_1 = model3_1.predict(X_test_kb_10)
y_hat3_1 = y_hat3_1 > .5
metrics.accuracy_score(test_5w,y_hat3_1)

0.47058823529411764

In [29]:
n_steps = X_train_kb_25.shape[1]
n_features = X_train_kb_25.shape[2]

model3_2 = Sequential()
model3_2.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_2.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_2.add(MaxPooling1D(pool_size=2))
model3_2.add(Flatten())
model3_2.add(Dense(25, activation='relu')) 
model3_2.add(Dense(1, activation='sigmoid'))

model3_2.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_2.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<keras.callbacks.History at 0x159dfee50>

In [30]:
y_hat3_2 = model3_2.predict(X_test_kb_25)
y_hat3_2 = y_hat3_2 > .5
metrics.accuracy_score(test_5w,y_hat3_2)

0.4852941176470588

In [31]:
n_steps = X_train_kb_40.shape[1]
n_features = X_train_kb_40.shape[2]

model3_3 = Sequential()
model3_3.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_3.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_3.add(MaxPooling1D(pool_size=2))
model3_3.add(Flatten())
model3_3.add(Dense(25, activation='relu')) 
model3_3.add(Dense(1, activation='sigmoid'))

model3_3.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_3.fit(X_train_kb_40, train_5w,epochs=30,  validation_data=(X_val_kb_40, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x159dd7820>

In [32]:
y_hat3_3 = model3_3.predict(X_test_kb_40)
y_hat3_3 = y_hat3_3 > .5
metrics.accuracy_score(test_5w,y_hat3_3)

0.35294117647058826

In [33]:
n_steps = X_train_kb_55.shape[1]
n_features = X_train_kb_55.shape[2]

model3_4 = Sequential()
model3_4.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_4.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_4.add(MaxPooling1D(pool_size=2))
model3_4.add(Flatten())
model3_4.add(Dense(25, activation='relu')) 
model3_4.add(Dense(1, activation='sigmoid'))

model3_4.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_4.fit(X_train_kb_55, train_5w,epochs=30,  validation_data=(X_val_kb_55, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<keras.callbacks.History at 0x159dcf130>

In [34]:
y_hat3_4 = model3_4.predict(X_test_kb_55)
y_hat3_4 = y_hat3_4 > .5
metrics.accuracy_score(test_5w,y_hat3_4)

0.4117647058823529

### Model 4, adding conv layer after pooling

In [35]:
# Model 4_1 - 4_4, adding conv layer after pooling
n_steps = X_train_kb_10.shape[1]
n_features = X_train_kb_10.shape[2]

model4_1 = Sequential()
model4_1.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model4_1.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model4_1.add(MaxPooling1D(pool_size=2))
model4_1.add(Conv1D(filters=16, kernel_size=1, activation='relu'))
model4_1.add(Flatten())
model4_1.add(Dense(8, activation='relu')) 
model4_1.add(Dense(1, activation='sigmoid'))

model4_1.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model4_1.fit(X_train_kb_10, train_5w,epochs=30,  validation_data=(X_val_kb_10, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<keras.callbacks.History at 0x15a326fa0>

In [36]:
y_hat4_1 = model4_1.predict(X_test_kb_10)
y_hat4_1 = y_hat4_1 > .5
metrics.accuracy_score(test_5w,y_hat4_1)

0.36764705882352944

In [37]:
n_steps = X_train_kb_25.shape[1]
n_features = X_train_kb_25.shape[2]

model4_2 = Sequential()
model4_2.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model4_2.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model4_2.add(MaxPooling1D(pool_size=2))
model4_2.add(Conv1D(filters=16, kernel_size=1, activation='relu'))
model4_2.add(Flatten())
model4_2.add(Dense(25, activation='relu')) 
model4_2.add(Dense(1, activation='sigmoid'))

model4_2.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model4_2.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


<keras.callbacks.History at 0x1597e7250>

In [38]:
y_hat4_2 = model4_2.predict(X_test_kb_25)
y_hat4_2 = y_hat4_2 > .5
metrics.accuracy_score(test_5w,y_hat4_2)

0.4852941176470588

In [39]:
n_steps = X_train_kb_40.shape[1]
n_features = X_train_kb_40.shape[2]

model4_3 = Sequential()
model4_3.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model4_3.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model4_3.add(MaxPooling1D(pool_size=2))
model4_3.add(Conv1D(filters=16, kernel_size=1, activation='relu'))
model4_3.add(Flatten())
model4_3.add(Dense(25, activation='relu')) 
model4_3.add(Dense(1, activation='sigmoid'))

model4_3.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model4_3.fit(X_train_kb_40, train_5w,epochs=30,  validation_data=(X_val_kb_40, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


<keras.callbacks.History at 0x15a574df0>

In [40]:
y_hat4_3 = model4_3.predict(X_test_kb_40)
y_hat4_3 = y_hat4_3 > .5
metrics.accuracy_score(test_5w,y_hat4_3)

0.5147058823529411

In [41]:
n_steps = X_train_kb_55.shape[1]
n_features = X_train_kb_55.shape[2]

model4_4 = Sequential()
model4_4.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model4_4.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model4_4.add(MaxPooling1D(pool_size=2))
model4_4.add(Conv1D(filters=16, kernel_size=1, activation='relu'))
model4_4.add(Flatten())
model4_4.add(Dense(25, activation='relu')) 
model4_4.add(Dense(1, activation='sigmoid'))

model4_4.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model4_4.fit(X_train_kb_55, train_5w,epochs=30,  validation_data=(X_val_kb_55, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x15a74dbe0>

In [42]:
y_hat4_4 = model4_4.predict(X_test_kb_55)
y_hat4_4 = y_hat4_4 > .5
metrics.accuracy_score(test_5w,y_hat4_4)

0.4852941176470588

# Trying the Correlation Method

In [43]:
corr_matrix = X_train.corr()
corr_matrix

feats_corr_10 = list(pd.DataFrame(corr_matrix['target_3'].sort_values(ascending=False).iloc[1:11]).reset_index()['index'])
feats_corr_25 = list(pd.DataFrame(corr_matrix['target_3'].sort_values(ascending=False).iloc[1:26]).reset_index()['index'])
feats_corr_40 = list(pd.DataFrame(corr_matrix['target_3'].sort_values(ascending=False).iloc[1:41]).reset_index()['index'])
feats_corr_55 = list(pd.DataFrame(corr_matrix['target_3'].sort_values(ascending=False).iloc[1:56]).reset_index()['index'])

X_train_cr_10 = X_train[feats_corr_10]
X_test_cr_10 = X_test[feats_corr_10]
X_val_cr_10 = X_val[feats_corr_10]
X_train_cr_25 = X_train[feats_corr_25]
X_test_cr_25 = X_test[feats_corr_25]
X_val_cr_25 = X_val[feats_corr_25]
X_train_cr_40 = X_train[feats_corr_40]
X_test_cr_40 = X_test[feats_corr_40]
X_val_cr_40 = X_val[feats_corr_40]
X_train_cr_55 = X_train[feats_corr_55]
X_test_cr_55 = X_test[feats_corr_55]
X_val_cr_55 = X_val[feats_corr_55]

def pca_finder(df, df_val, df_test):
    
    pca = PCA().fit(df)
    pca_scores = np.cumsum(pca.explained_variance_ratio_) * 100
    res = next(x for x, val in enumerate(pca_scores) if val > 85)
    res = res + 1
    res
    
    pca_cr_1 = PCA(n_components = res).fit(df.to_numpy())
    df = pca_cr_1.transform(df.to_numpy())
    df_val = pca_cr_1.transform(df_val.to_numpy())
    df_test = pca_cr_1.transform(df_test.to_numpy())
    return df, df_val, df_test

X_train_cr_10, X_val_cr_10, X_test_cr_10 = pca_finder(X_train_cr_10, X_val_cr_10, X_test_cr_10)
X_train_cr_25, X_val_cr_25, X_test_cr_25 = pca_finder(X_train_cr_25, X_val_cr_25, X_test_cr_25)
X_train_cr_40, X_val_cr_40, X_test_cr_40 = pca_finder(X_train_cr_40, X_val_cr_40, X_test_cr_40)
X_train_cr_55, X_val_cr_55, X_test_cr_55 = pca_finder(X_train_cr_55, X_val_cr_55, X_test_cr_55)

In [44]:
X_train_cr_40, train_5w = df_to_X_y2(X_train_cr_40,y_train)
X_val_cr_40, val_5w = df_to_X_y2(X_val_cr_40, y_val)
X_test_cr_40, test_5w = df_to_X_y2(X_test_cr_40,y_test) 

X_train_cr_10, _ = df_to_X_y2(X_train_cr_10,y_train)
X_val_cr_10, _ = df_to_X_y2(X_val_cr_10, y_val)
X_test_cr_10, _ = df_to_X_y2(X_test_cr_10,y_test) 

X_train_cr_25, _ = df_to_X_y2(X_train_cr_25,y_train)
X_val_cr_25, _ = df_to_X_y2(X_val_cr_25, y_val)
X_test_cr_25, _ = df_to_X_y2(X_test_cr_25,y_test) 

X_train_cr_55, _ = df_to_X_y2(X_train_cr_55,y_train)
X_val_cr_55, _ = df_to_X_y2(X_val_cr_55, y_val)
X_test_cr_55, _ = df_to_X_y2(X_test_cr_55,y_test) 

In [45]:
# Model 2_1 - 2_4, using differnt k best pca variables. reduced filters
n_steps = X_train_cr_10.shape[1]
n_features = X_train_cr_10.shape[2]

model2_1_cr = Sequential()
model2_1_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_1_cr.add(Flatten())
model2_1_cr.add(Dense(25, activation='relu')) 
model2_1_cr.add(Dense(1, activation='sigmoid'))

model2_1_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_1_cr.fit(X_train_cr_10, train_5w,epochs=30,  validation_data=(X_val_cr_10, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


<keras.callbacks.History at 0x159c45b20>

In [46]:
y_hat2_1_cr = model2_1_cr.predict(X_test_cr_10)
y_hat2_1_cr = y_hat2_1_cr > .5
metrics.accuracy_score(test_5w,y_hat2_1_cr)

0.5147058823529411

In [47]:
n_steps = X_train_cr_25.shape[1]
n_features = X_train_cr_25.shape[2]

model2_2_cr = Sequential()
model2_2_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_2_cr.add(Flatten())
model2_2_cr.add(Dense(25, activation='relu')) 
model2_2_cr.add(Dense(1, activation='sigmoid'))

model2_2_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_2_cr.fit(X_train_cr_25, train_5w,epochs=30,  validation_data=(X_val_cr_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30


<keras.callbacks.History at 0x15aa84ca0>

In [48]:
y_hat2_2_cr = model2_2_cr.predict(X_test_cr_25)
y_hat2_2_cr = y_hat2_2_cr > .5
metrics.accuracy_score(test_5w,y_hat2_2_cr)

0.5294117647058824

In [49]:
n_steps = X_train_cr_40.shape[1]
n_features = X_train_cr_40.shape[2]

model2_3_cr = Sequential()
model2_3_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_3_cr.add(Flatten())
model2_3_cr.add(Dense(25, activation='relu')) 
model2_3_cr.add(Dense(1, activation='sigmoid'))

model2_3_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_3_cr.fit(X_train_cr_40, train_5w,epochs=30,  validation_data=(X_val_cr_40, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30


<keras.callbacks.History at 0x15abf25b0>

In [50]:
y_hat2_3_cr = model2_3_cr.predict(X_test_cr_40)
y_hat2_3_cr = y_hat2_3_cr > .5
metrics.accuracy_score(test_5w,y_hat2_3_cr)

0.4264705882352941

In [51]:
n_steps = X_train_cr_55.shape[1]
n_features = X_train_cr_55.shape[2]

model2_4_cr = Sequential()
model2_4_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 

model2_4_cr.add(Flatten())
model2_4_cr.add(Dense(25, activation='relu')) 
model2_4_cr.add(Dense(1, activation='sigmoid'))

model2_4_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model2_4_cr.fit(X_train_cr_55, train_5w,epochs=30,  validation_data=(X_val_cr_55, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


<keras.callbacks.History at 0x15ad8ce50>

In [52]:
y_hat2_4_cr = model2_4_cr.predict(X_test_cr_55)
y_hat2_4_cr = y_hat2_4_cr > .5
metrics.accuracy_score(test_5w,y_hat2_4_cr)

0.45588235294117646

In [53]:
# Model 3_1 - 3_4, adding layers
n_steps = X_train_cr_10.shape[1]
n_features = X_train_cr_10.shape[2]

model3_1_cr = Sequential()
model3_1_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_1_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_1_cr.add(MaxPooling1D(pool_size=2))
model3_1_cr.add(Flatten())
model3_1_cr.add(Dense(8, activation='relu')) 
model3_1_cr.add(Dense(1, activation='sigmoid'))

model3_1_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_1_cr.fit(X_train_cr_10, train_5w,epochs=30,  validation_data=(X_val_cr_10, val_5w), callbacks = [early_stopping_monitor])
y_hat3_1_cr = model3_1_cr.predict(X_test_cr_10)
y_hat3_1_cr = y_hat3_1_cr > .5
metrics.accuracy_score(test_5w,y_hat3_1_cr)


Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30


0.5294117647058824

In [54]:
n_steps = X_train_cr_25.shape[1]
n_features = X_train_cr_25.shape[2]

model3_2_cr = Sequential()
model3_2_cr.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_2_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_2_cr.add(MaxPooling1D(pool_size=2))
model3_2_cr.add(Flatten())
model3_2_cr.add(Dense(25, activation='relu')) 
model3_2_cr.add(Dense(1, activation='sigmoid'))

model3_2_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_2_cr.fit(X_train_cr_25, train_5w,epochs=30,  validation_data=(X_val_cr_25, val_5w), callbacks = [early_stopping_monitor])
y_hat3_2_cr = model3_2_cr.predict(X_test_cr_25)
y_hat3_2_cr = y_hat3_2_cr > .5
metrics.accuracy_score(test_5w,y_hat3_2_cr)


Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30


0.6176470588235294

In [55]:

n_steps = X_train_cr_40.shape[1]
n_features = X_train_cr_40.shape[2]

model3_3_cr = Sequential()
model3_3_cr.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_3_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_3_cr.add(MaxPooling1D(pool_size=2))
model3_3_cr.add(Flatten())
model3_3_cr.add(Dense(25, activation='relu')) 
model3_3_cr.add(Dense(1, activation='sigmoid'))

model3_3_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_3_cr.fit(X_train_cr_40, train_5w,epochs=30,  validation_data=(X_val_cr_40, val_5w), callbacks = [early_stopping_monitor])
y_hat3_3_cr = model3_3_cr.predict(X_test_cr_40)
y_hat3_3_cr = y_hat3_3_cr > .5
metrics.accuracy_score(test_5w,y_hat3_3_cr)


Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


0.4852941176470588

In [56]:
n_steps = X_train_cr_55.shape[1]
n_features = X_train_cr_55.shape[2]

model3_4_cr = Sequential()
model3_4_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model3_4_cr.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model3_4_cr.add(MaxPooling1D(pool_size=2))
model3_4_cr.add(Flatten())
model3_4_cr.add(Dense(25, activation='relu')) 
model3_4_cr.add(Dense(1, activation='sigmoid'))

model3_4_cr.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model3_4_cr.fit(X_train_cr_55, train_5w,epochs=30,  validation_data=(X_val_cr_55, val_5w), callbacks = [early_stopping_monitor])
y_hat3_4_cr = model3_4_cr.predict(X_test_cr_55)
y_hat3_4_cr = y_hat3_4_cr > .5
metrics.accuracy_score(test_5w,y_hat3_4_cr)

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


0.38235294117647056

# Changes to the Target Window

In [57]:
X_train_kb_10, X_val_kb_10, X_test_kb_10 = kbest_creator(10, X_train.drop('target_3', axis=1), X_val.drop('target_3', axis=1), X_test.drop('target_3', axis=1)) 
X_train_kb_25, X_val_kb_25, X_test_kb_25 = kbest_creator(25, X_train.drop('target_3', axis=1), X_val.drop('target_3', axis=1), X_test.drop('target_3', axis=1)) 
X_train_kb_40, X_val_kb_40, X_test_kb_40 = kbest_creator(40, X_train.drop('target_3', axis=1), X_val.drop('target_3', axis=1), X_test.drop('target_3', axis=1)) 
X_train_kb_55, X_val_kb_55, X_test_kb_55 = kbest_creator(55, X_train.drop('target_3', axis=1), X_val.drop('target_3', axis=1), X_test.drop('target_3', axis=1)) 

  correlation_coefficient /= X_norms
  correlation_coefficient /= X_norms
  correlation_coefficient /= X_norms
  correlation_coefficient /= X_norms


In [58]:
def df_to_X_y_mod(df, target, window_size=5):
  # converts to matrix of numpy arrays
  X = []
  y = []
  for i in range(len(df)-window_size): # length of data frame - window_size so it does't take empty values at the end, 
    # does force you to loose the last 5 values, could fix with padding
    row = [r for r in df[i:i+window_size]] # grabs row i and all rows above within the window size length
    X.append(row) # creates 3 dimentional array, (# obseravtions, # rows in window, # features)
    label = target[i+window_size-1] # pulls the target variable after the window, target varible needs to be column zero in this 
    y.append(label) # returns (N,) martix of targets i+window_length time periods away
  return np.array(X), np.array(y)
  


In [59]:
# converting to window format, in this case 5 periods
X_train_kb_40, train_5w = df_to_X_y2(X_train_kb_40,y_train)
X_val_kb_40, val_5w = df_to_X_y2(X_val_kb_40, y_val)
X_test_kb_40, test_5w = df_to_X_y2(X_test_kb_40,y_test) 

X_train_kb_10, _ = df_to_X_y2(X_train_kb_10,y_train)
X_val_kb_10, _ = df_to_X_y2(X_val_kb_10, y_val)
X_test_kb_10, _ = df_to_X_y2(X_test_kb_10,y_test) 

X_train_kb_25, _ = df_to_X_y2(X_train_kb_25,y_train)
X_val_kb_25, _ = df_to_X_y2(X_val_kb_25, y_val)
X_test_kb_25, _ = df_to_X_y2(X_test_kb_25,y_test) 

X_train_kb_55, _ = df_to_X_y2(X_train_kb_55,y_train)
X_val_kb_55, _ = df_to_X_y2(X_val_kb_55, y_val)
X_test_kb_55, _ = df_to_X_y2(X_test_kb_55,y_test) 

In [60]:
n_steps = X_train_kb_25.shape[1]
n_features = X_train_kb_25.shape[2]

model_mod4 = Sequential()
model_mod4.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model_mod4.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model_mod4.add(MaxPooling1D(pool_size=2))
model_mod4.add(Conv1D(filters=16, kernel_size=1, activation='relu'))
model_mod4.add(Flatten())
model_mod4.add(Dense(25, activation='relu')) 
model_mod4.add(Dense(1, activation='sigmoid'))

model_mod4.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model_mod4.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


<keras.callbacks.History at 0x15b40ac70>

In [61]:
y_hat_mod4_kb = model_mod4.predict(X_test_kb_25)
y_hat_mod4_kb = y_hat_mod4_kb > .5
metrics.accuracy_score(test_5w,y_hat_mod4_kb)

0.4264705882352941

In [62]:
model_mod3 = Sequential()
model_mod3.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model_mod3.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model_mod3.add(MaxPooling1D(pool_size=2))
model_mod3.add(Flatten())
model_mod3.add(Dense(25, activation='relu')) 
model_mod3.add(Dense(1, activation='sigmoid'))

model_mod3.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model_mod3.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w), callbacks = [early_stopping_monitor])

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30


<keras.callbacks.History at 0x15b5efeb0>

In [63]:
y_hat_mod3_kb = model_mod3.predict(X_test_kb_25)
y_hat_mod3_kb = y_hat_mod3_kb > .5
metrics.accuracy_score(test_5w,y_hat_mod3_kb)

0.5

In [64]:
model_mod2 = Sequential()
model_mod2.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model_mod2.add(Conv1D(filters=8, kernel_size=2, activation='relu')) 
model_mod2.add(Flatten())
model_mod2.add(Dense(25, activation='relu')) 
model_mod2.add(Dense(1, activation='sigmoid'))

model_mod2.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model_mod2.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w))

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x15b7acfa0>

In [65]:
y_hat_mod2_kb = model_mod2.predict(X_test_kb_25)
y_hat_mod2_kb = y_hat_mod2_kb > .5
metrics.accuracy_score(test_5w,y_hat_mod2_kb)

0.4852941176470588

In [66]:
model_mod1 = Sequential()
model_mod1.add(Conv1D(filters=8, kernel_size=2, activation='relu', input_shape=(n_steps,n_features))) 
model_mod1.add(Flatten())
model_mod1.add(Dense(25, activation='relu')) 
model_mod1.add(Dense(1, activation='sigmoid'))

model_mod1.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=["BinaryAccuracy"])

model_mod1.fit(X_train_kb_25, train_5w,epochs=30,  validation_data=(X_val_kb_25, val_5w))

Epoch 1/30


  return dispatch_target(*args, **kwargs)


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x15b942d30>

In [67]:
y_hat_mod1_kb = model_mod1.predict(X_test_kb_25)
y_hat_mod1_kb = y_hat_mod1_kb > .5
metrics.accuracy_score(test_5w,y_hat_mod1_kb)

0.5441176470588235