In [1]:
import pandas as pd
from helpers.helper_functions import load_data, get_scaled_data


## Load data

In [2]:
train, test = load_data('data')
X_train, y_train = pd.read_csv('data/X_prepped.csv'), train.target
X_test = get_scaled_data(test, is_test=True)

## ML 

In [None]:
from tensorflow import keras
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from helpers.train_helpers import BATCH_SIZE, EPOCHS

In [None]:

INPUT_SHAPE = X_test.shape[1]
model_3 = keras.models.Sequential(
    [
        keras.layers.Dense(100, activation="swish", input_dim = INPUT_SHAPE),
        keras.layers.Dense(50, activation="swish"),
        keras.layers.Dense(1, activation="sigmoid"),
    ],
    name="Dense_model_3",
)

In [None]:
def test_predictor(model_in, X_train, y_train, X_test, n_folds = 5):
    early_stopping = keras.callbacks.EarlyStopping(
                    patience=20, monitor="val_loss", restore_best_weights=True, verbose = 1
                )
    learn_reducer = keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.7, patience = 5, verbose = 1)
    kf = KFold(n_folds)
    store = []

    model_in.summary()

    for fold, (train_idx, val_idx) in enumerate(
        kf.split(train)
    ):
        
        print(f"Fitting fold {fold} for {model_in.name}...")
        model = keras.models.clone_model(model_in)
        model.compile(
            optimizer="adam", loss="binary_crossentropy", metrics=[keras.metrics.AUC()]
        )

        X_train, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        model.fit(
            X_train,
            y_train,
            validation_data=(X_val, y_val),
            epochs=EPOCHS,
            verbose=1,
            batch_size=BATCH_SIZE,
            callbacks=[
                early_stopping, learn_reducer
            ],
        )
        auc = roc_auc_score(y_val, model.predict(X_val).squeeze())
        print(f"The val auc for fold {fold}, {model_in.name} is {auc}")
        store.append(model.predict(X_test).squeeze())

    result = sum(store) / n_folds 
    return result

In [None]:
preds = test_predictor(model_3, X_train, y_train, X_test, n_folds = 5)

In [None]:
sub = pd.read_csv('data/sample_submission.csv')
sub['state'] = preds.round()

In [3]:
import os
if not os.path.exists('data/submissions'):
    os.mkdir('data/submissions')

sub.to_csv('data/submissions/nn_sub.csv', index = False)