In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import seaborn as sns
import datetime
import pickle

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, OneHotEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers

# ensure that all columns are shown and that colum content is not cut
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width',1000)
pd.set_option('display.max_rows', 500) # ensure that all rows are shown

def identify_feature_types(df):
    """Identifies the numerical and categorical columns in the DataFrame."""
    numerical_features = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    categorical_features = df.select_dtypes(include=['object']).columns.tolist()

    # Exclude 'Ticker' from categorical features as it's not needed for transformation
    if 'target' in numerical_features:
        numerical_features.remove('target')
    if 'cik' in numerical_features:
        numerical_features.remove('cik')
    if 'TICKER' in categorical_features:
        categorical_features.remove('TICKER')
    if 'quarter' in categorical_features:
        categorical_features.remove('quarter')

    return numerical_features, categorical_features

def create_preprocessing_pipeline(numerical_features, categorical_features):
    """Creates the preprocessing pipeline for numerical and categorical features."""
    # Preprocessing for numerical data: RobustScaler to make our numbers más robusto.
    numerical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),  # Handle NaNs
        ('scaler', RobustScaler())  # Scale the data
    ])

    # Preprocessing for categorical data: OneHotEncoder to give each category its own columm...
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),  # Handle missing categories
        ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))  # Encode categories
    ])

    # Combine the transformers into one big ColumnTransformer.
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ],
        remainder='passthrough'
    )

    return preprocessor

def preprocess_training_data(X_train, preprocessor=None):
    """Fits and transforms the training data using the provided pipeline."""
    if preprocessor is None:
        # Identify feature types
        numerical_features, categorical_features = identify_feature_types(X_train)
        preprocessor = create_preprocessing_pipeline(numerical_features, categorical_features)

    # Fit and transform the training data
    X_train_processed = preprocessor.fit_transform(X_train)
    return X_train_processed, preprocessor

def preprocess_new_data(X_new, preprocessor):
    """Transforms new/unseen/test data using a pre-fitted pipeline."""
    if preprocessor is None:
        raise ValueError("The preprocessor must be fitted on training data first before transforming new data.")

    # Transform the new data (no fitting here)
    X_new_processed = preprocessor.transform(X_new)
    return X_new_processed

In [42]:
df = pd.read_csv('../raw_data/data_for_preprocessing.csv', index_col=0)

In [43]:
df.shape

(170124, 59)

# Initial Run

In [57]:
def run_RNN(df, quarters_input=4, threshold=0.5, small_cap=True, horizon='quarter'):

    # Set model according to given parameters
    if horizon == 'quarter':
        col = 'mc_qtr_growth_pct'
        adjustment = 0
    elif horizon == 'year':
        col = 'mc_yr_growth_pct'
        adjustment = 3
    elif horizon == '2year':
        col = 'mc_2yr_growth_pct'
        adjustment = 7

    final_activation = 'sigmoid'
    metrics=['accuracy', 'precision', 'recall']
    
    df['qtr'] = df.quarter.apply(lambda x: x.split('-')[1])

    if small_cap==True:
        target_func = lambda x: 1 if ((x[col] > threshold) & (x.small_cap == 1)) else 0
    else:
        target_func = lambda x: 1 if ((x[col] > threshold)) else 0

    df['target'] = df.apply(target_func, axis=1)

    # Train_Test Split
    unique_groups = df['TICKER'].unique()
    train_groups, test_groups = train_test_split(unique_groups, test_size=0.3, random_state=42)

    data_train = df[df['TICKER'].isin(train_groups)]
    data_test = df[df['TICKER'].isin(test_groups)]

    # Remove columns
    cols_drop = ['CIK',
                 'mc_qtr_growth',
                 'mc_qtr_growth_pct',
                 'mc_yr_growth',
                 'mc_yr_growth_pct',
                 'mc_2yr_growth',
                 'mc_2yr_growth_pct',
                 'date',
                 'year']

    X_train = data_train[data_train['TICKER'].isin(train_groups)].drop(columns=cols_drop).reset_index(drop=True)
    X_test = data_test[data_test['TICKER'].isin(test_groups)].drop(columns=cols_drop).reset_index(drop=True)

    # Preprocess X_train and X_test
    num, cat = identify_feature_types(X_train)
    preprocessor = create_preprocessing_pipeline(num, cat)

    X_train_pp, preprocessor = preprocess_training_data(X_train, preprocessor=preprocessor)
    X_train_pp = pd.DataFrame(X_train_pp, columns=preprocessor.get_feature_names_out())

    X_test_pp = preprocess_new_data(X_test, preprocessor=preprocessor)
    X_test_pp = pd.DataFrame(X_test_pp, columns=preprocessor.get_feature_names_out())

    X_train_sequences = []
    y_train_sequences = []
    X_test_sequences = []
    y_test_sequences = []

    for company in X_train_pp.groupby(['remainder__cik', 'remainder__TICKER']):
        for i, _ in enumerate(company[1].iterrows()):
            if i+quarters_input+1+adjustment > len(company[1]):
                break
            sequence = company[1].iloc[i:i+quarters_input, :-4]
            target = company[1].iloc[i+quarters_input+adjustment, -1]
            X_train_sequences.append(sequence)
            y_train_sequences.append(target)
    X_train_sequences = np.array(X_train_sequences).astype('float32')
    y_train_sequences = np.array(y_train_sequences).astype('float32')

    for company in X_test_pp.groupby(['remainder__cik', 'remainder__TICKER']):
        for i, _ in enumerate(company[1].iterrows()):
            if i+quarters_input+1+adjustment > len(company[1]):
                break
            sequence = company[1].iloc[i:i+quarters_input, :-4]
            target = company[1].iloc[i+quarters_input+adjustment, -1]
            X_test_sequences.append(sequence)
            y_test_sequences.append(target)
    X_test_sequences = np.array(X_test_sequences).astype('float32')
    y_test_sequences = np.array(y_test_sequences).astype('float32')
            
    adam = Adam(learning_rate=0.002, beta_1=0.95)
    weight_for_0 = (0.4 / (len(y_train_sequences) - sum(y_train_sequences))) * (len(y_train_sequences) / 2)
    weight_for_1 = (0.6 / sum(y_train_sequences)) * (len(y_train_sequences) / 2)

    es = EarlyStopping(patience=7, restore_best_weights=True)
    plateau = ReduceLROnPlateau()
    class_weight = {0: weight_for_0, 1: weight_for_1}

    # 1- RNN Architecture
    model = Sequential()
    model.add(layers.LSTM(units=240, activation='tanh', input_shape=(quarters_input, 121), return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.LSTM(units=160, activation='tanh'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(140, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(100, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(60, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(30, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(1, activation=final_activation))

    # 2- Compilation
    model.compile(loss='binary_crossentropy', 
                  optimizer=adam, 
                  metrics=metrics)

    # 3- Fit
    history = model.fit(X_train_sequences, y_train_sequences, validation_split=0.2, epochs=100, batch_size=32,
                        callbacks=[es, plateau], verbose=3, class_weight=class_weight)

            
    # Save Model
    file_name = f"{datetime.datetime.now()}_RNN_{quarters_input}_qtr_{threshold}_ths_sc_{small_cap}.pkl"
    model_dir = '../models/'

    with open(model_dir+file_name, "wb") as file:
        pickle.dump(model, file)
        
    tar_pct = round((y_train_sequences.sum() / y_train_sequences.shape)[0],4)
    val_pre = round(np.mean(history.history.get('val_precision')),4)
    val_acc = round(np.mean(history.history.get('val_accuracy')),4)
    val_rec = round(np.mean(history.history.get('val_recall')),4)
    
    entry = {file_name: {'target_pct': tar_pct, 'accuracy': val_acc, 'precision': val_pre, 'recall': val_rec, 'params': [quarters_input, threshold, small_cap, horizon]}}
    print(entry)
    return entry

In [58]:
quarters_input= list(range(4, 13, 4)) #3
threshold = [0.3, 0.5] #2
small_cap=[True, False] #2
horizon=['quarter-ahead', 'year-ahead', 'two-years-ahead'] #3


In [59]:
results = []

In [None]:
results = []
for q in quarters_input:
    for t in threshold:
        for s in small_cap:
            for h in horizon:
                results.append(run_RNN(df, quarters_input=q, threshold=t, small_cap=s, horizon=h))

Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
{'2024-09-12 10:05:25.049709_RNN_4_qtr_0.3_ths_sc_True.pkl': {'target_pct': 0.0878, 'accuracy': 0.8853, 'precision': 0.2351, 'recall': 0.0142, 'params': [4, 0.3, True, 'quarter']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
{'2024-09-12 10:08:32.472614_RNN_4_qtr_0.3_ths_sc_True.pkl': {'target_pct': 0.178, 'accuracy': 0.8042, 'precision': 0.5961, 'recall': 0.0984, 'params': [4, 0.3, True, 'year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
{'2024-09-12 10:10:52.552133_RNN_4_qtr_0.3_ths_sc_True.pkl': {'target_pct': 0.2182, 'accuracy': 0.7666, 'precision': 0.457, 'recall': 0.1446, 'params': [4, 0.3, True, '2year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
{'2024-09-12 10:14:49.917173_RNN_4_qtr_0.3_ths_sc_False.pkl': {'target_pct': 0.1071, 'accuracy': 0.8597, 'precision': 0.3634, 'recall': 0.0141, 'params': [4, 0.3, False, 'quarter']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
{'2024-09-12 10:17:54.103194_RNN_4_qtr_0.3_ths_sc_False.pkl': {'target_pct': 0.2712, 'accuracy': 0.7184, 'precision': 0.5806, 'recall': 0.0943, 'params': [4, 0.3, False, 'year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
{'2024-09-12 10:20:13.755390_RNN_4_qtr_0.3_ths_sc_False.pkl': {'target_pct': 0.3781, 'accuracy': 0.6398, 'precision': 0.486, 'recall': 0.1873, 'params': [4, 0.3, False, '2year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
{'2024-09-12 10:28:24.470842_RNN_4_qtr_0.5_ths_sc_True.pkl': {'target_pct': 0.0485, 'accuracy': 0.9286, 'precision': 0.0787, 'recall': 0.0016, 'params': [4, 0.5, True, 'quarter']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
{'2024-09-12 10:32:36.120106_RNN_4_qtr_0.5_ths_sc_True.pkl': {'target_pct': 0.126, 'accuracy': 0.846, 'precision': 0.5443, 'recall': 0.0665, 'params': [4, 0.5, True, 'year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
{'2024-09-12 10:35:48.066088_RNN_4_qtr_0.5_ths_sc_True.pkl': {'target_pct': 0.1672, 'accuracy': 0.8028, 'precision': 0.3536, 'recall': 0.0958, 'params': [4, 0.5, True, '2year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
{'2024-09-12 10:41:54.643994_RNN_4_qtr_0.5_ths_sc_False.pkl': {'target_pct': 0.0545, 'accuracy': 0.9194, 'precision': 0.0458, 'recall': 0.0012, 'params': [4, 0.5, False, 'quarter']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
{'2024-09-12 10:46:50.327032_RNN_4_qtr_0.5_ths_sc_False.pkl': {'target_pct': 0.1754, 'accuracy': 0.7899, 'precision': 0.5065, 'recall': 0.0806, 'params': [4, 0.5, False, 'year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
{'2024-09-12 10:50:32.861442_RNN_4_qtr_0.5_ths_sc_False.pkl': {'target_pct': 0.2733, 'accuracy': 0.7058, 'precision': 0.351, 'recall': 0.1131, 'params': [4, 0.5, False, '2year']}}


  super().__init__(**kwargs)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
{'2024-09-12 11:03:24.811925_RNN_8_qtr_0.3_ths_sc_True.pkl': {'target_pct': 0.0823, 'accuracy': 0.8875, 'precision': 0.0, 'recall': 0.0, 'params': [8, 0.3, True, 'quarter']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
{'2024-09-12 11:09:21.189881_RNN_8_qtr_0.3_ths_sc_True.pkl': {'target_pct': 0.1631, 'accuracy': 0.8113, 'precision': 0.5797, 'recall': 0.1133, 'params': [8, 0.3, True, 'year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
{'2024-09-12 11:13:45.704289_RNN_8_qtr_0.3_ths_sc_True.pkl': {'target_pct': 0.2053, 'accuracy': 0.7759, 'precision': 0.3362, 'recall': 0.096, 'params': [8, 0.3, True, '2year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
{'2024-09-12 11:21:49.590558_RNN_8_qtr_0.3_ths_sc_False.pkl': {'target_pct': 0.1016, 'accuracy': 0.8642, 'precision': 0.1601, 'recall': 0.0087, 'params': [8, 0.3, False, 'quarter']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
{'2024-09-12 11:30:21.126793_RNN_8_qtr_0.3_ths_sc_False.pkl': {'target_pct': 0.2584, 'accuracy': 0.7278, 'precision': 0.5454, 'recall': 0.1186, 'params': [8, 0.3, False, 'year']}}
Epoch 1/100


  super().__init__(**kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
{'2024-09-12 11:35:36.169601_RNN_8_qtr_0.3_ths_sc_False.pkl': {'target_pct': 0.3662, 'accuracy': 0.643, 'precision': 0.4823, 'recall': 0.1361, 'params': [8, 0.3, False, '2year']}}


  super().__init__(**kwargs)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


In [None]:
len(results)

In [None]:
idx = []
content = {'target_pct': [], 'accuracy': [], 'precision': [], 'recall': [], 'params': []}

for line in results:
    for k in content.keys():
        content[k].append(list(line.values())[0].get(k))
    idx.append(list(line.keys())[0])
    
content_df = pd.DataFrame(content, index=idx)

In [None]:
content_df.to_csv('../raw_data/Datasets/RNN_GS_v2.csv', sep=';')

In [None]:
content_df.info()

# Final Run


In [None]:
def run_RNN_v2(df, quarters_input=4, threshold=0.5, small_cap=True, horizon='quarter'):

    # Set model according to given parameters
    if horizon == 'quarter':
        col = 'mc_qtr_growth_pct'
        adjustment = 0
    elif horizon == 'year':
        col = 'mc_yr_growth_pct'
        adjustment = 3
    elif horizon == '2year':
        col = 'mc_2yr_growth_pct'
        adjustment = 7

    final_activation = 'sigmoid'
    metrics=['accuracy', 'precision', 'recall']
    
    df['qtr'] = df.quarter.apply(lambda x: x.split('-')[1])

    if small_cap==True:
        target_func = lambda x: 1 if ((x[col] > threshold) & (x.small_cap == 1)) else 0
    else:
        target_func = lambda x: 1 if ((x[col] > threshold)) else 0

    df['target'] = df.apply(target_func, axis=1)

    # Remove columns
    cols_drop = ['CIK',
                 'mc_qtr_growth',
                 'mc_qtr_growth_pct',
                 'mc_yr_growth',
                 'mc_yr_growth_pct',
                 'mc_2yr_growth',
                 'mc_2yr_growth_pct',
                 'date',
                 'year']

    df = df[data_train['TICKER']].drop(columns=cols_drop).reset_index(drop=True)

    # Preprocess X_train and X_test
    num, cat = identify_feature_types(df)
    preprocessor = create_preprocessing_pipeline(num, cat)

    df_pp, preprocessor = preprocess_training_data(df, preprocessor=preprocessor)
    df_pp = pd.DataFrame(df_pp, columns=preprocessor.get_feature_names_out())

    X_train_sequences = []
    y_train_sequences = []

    for company in df_pp.groupby(['remainder__cik', 'remainder__TICKER']):
        for i, _ in enumerate(company[1].iterrows()):
            if i+quarters_input+1+adjustment > len(company[1]):
                break
            sequence = company[1].iloc[i:i+quarters_input, :-4]
            target = company[1].iloc[i+quarters_input+adjustment, -1]
            X_train_sequences.append(sequence)
            y_train_sequences.append(target)
    X_train_sequences = np.array(X_train_sequences).astype('float32')
    y_train_sequences = np.array(y_train_sequences).astype('float32')
            
    adam = Adam(learning_rate=0.002, beta_1=0.95)
    weight_for_0 = (0.4 / (len(y_train_sequences) - sum(y_train_sequences))) * (len(y_train_sequences) / 2)
    weight_for_1 = (0.6 / sum(y_train_sequences)) * (len(y_train_sequences) / 2)

    es = EarlyStopping(patience=7, restore_best_weights=True)
    plateau = ReduceLROnPlateau()
    class_weight = {0: weight_for_0, 1: weight_for_1}

    # 1- RNN Architecture
    model = Sequential()
    model.add(layers.LSTM(units=240, activation='tanh', input_shape=(quarters_input, 121), return_sequences=True))
    model.add(layers.Dropout(0.2))
    model.add(layers.LSTM(units=160, activation='tanh'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(140, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(100, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(60, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(30, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(1, activation=final_activation))

    # 2- Compilation
    model.compile(loss='binary_crossentropy', 
                  optimizer=adam, 
                  metrics=metrics)

    # 3- Fit
    history = model.fit(X_train_sequences, y_train_sequences, validation_split=0.2, epochs=100, batch_size=32,
                        callbacks=[es, plateau], verbose=3, class_weight=class_weight)
    
    thresh = str(threshold*100).split('.')[0]
            
    # Save Model
    file_name = f"rnn_sc{small_cap}_{horizon}_{quarters_input}_seq_{thresh}.pkl"
    model_dir = '../models/'
    
    with open(model_dir+file_name, "wb") as file:
        pickle.dump(model, file)
        
    tar_pct = round((y_train_sequences.sum() / y_train_sequences.shape)[0],4)
    val_pre = round(np.mean(history.history.get('val_precision')),4)
    val_acc = round(np.mean(history.history.get('val_accuracy')),4)
    val_rec = round(np.mean(history.history.get('val_recall')),4)
    
    entry = {file_name: {'target_pct': tar_pct, 'accuracy': val_acc, 'precision': val_pre, 'recall': val_rec, 'params': [quarters_input, threshold, small_cap, horizon]}}
    print(entry)
    return entry

In [None]:
quarters_input= list(range(4, 13, 4)) #3
threshold = [0.3, 0.5] #2
small_cap=[True, False] #2
horizon=['quarter-ahead', 'year-ahead', 'two-years-ahead'] #3

In [None]:
results2 = []
for q in quarters_input:
    for t in threshold:
        for s in small_cap:
            for h in horizon:
                results.append(run_RNN(df, quarters_input=q, threshold=t, small_cap=s, horizon=h))

In [None]:
idx = []
content = {'target_pct': [], 'accuracy': [], 'precision': [], 'recall': [], 'params': []}

for line in results2:
    for k in content.keys():
        content[k].append(list(line.values())[0].get(k))
    idx.append(list(line.keys())[0])
    
content_df = pd.DataFrame(content, index=idx)

In [None]:
content_df.to_csv('../raw_data/Datasets/RNN_GS_v3.csv', sep=';')