In [None]:
import pandas as pd
import pickle
import joblib

from nn_load_data import LoadNNData
from nn_utils import *
from nn_models import *

# Validation

In [None]:
# Load data
with open('selected_columns_rf.pkl', 'rb') as f:
    selected_columns = pickle.load(f)


with open('X_columns.pkl', 'rb') as f:
    X_columns = pickle.load(f)
with open('train_X.pkl', 'rb') as f:
    train_X = pickle.load(f)
with open('train_y.pkl', 'rb') as f:
    train_y = pickle.load(f)
with open('test_X.pkl', 'rb') as f:
    test_X = pickle.load(f)
with open('test_y.pkl', 'rb') as f:
    test_y = pickle.load(f)


In [None]:
train_X = pd.DataFrame(train_X, columns=X_columns)
train_X = train_X[selected_columns]
train_X = train_X.values

test_X = pd.DataFrame(test_X, columns=X_columns)
test_X = test_X[selected_columns]
test_X = test_X.values

In [None]:
# Load models
nb_clf = joblib.load('nb_model.joblib')
lr_clf = joblib.load('lr_model.joblib')
rf_clf = joblib.load('rf_model.joblib')

In [None]:
# Load NN model
batch_size = 2048
number_of_features = len(selected_columns)
input_size = number_of_features
h1_size = 32
h2_size = 16
output_size = 1
sigmoid = nn.Sigmoid()
device='cpu'

NN_model = NeuralNetModule(input_size, h1_size, h2_size, output_size)
NN_model.load_state_dict(torch.load('NN_model.st'))
NN_model = NN_model.double()
NN_model.eval()

train_loader = LoadNNData(train_X, train_y, batch_size)
test_loader = LoadNNData(test_X, test_y, batch_size)

In [None]:
# Get train predictions
y_pred_nb_train = nb_clf.predict(train_X)
y_prob_nb_train = nb_clf.predict_proba(train_X)
y_prob_nb_train = y_prob_nb_train[:, 1]

y_pred_lr_train = lr_clf.predict(train_X)
y_prob_lr_train = lr_clf.predict_proba(train_X)
y_prob_lr_train = y_prob_lr_train[:, 1]

y_pred_rf_train = rf_clf.predict(train_X)
y_prob_rf_train = rf_clf.predict_proba(train_X)
y_prob_rf_train = y_prob_rf_train[:, 1]

y_true_nn_train, y_prob_nn_train = get_predictions(NN_model, device, train_loader, sigmoid)
y_pred_nn_train = np.where(y_prob_nn_train > 0.5, 1, 0)

In [None]:
# Get test predictions
y_pred_nb_test = nb_clf.predict(test_X)
y_prob_nb_test = nb_clf.predict_proba(test_X)
y_prob_nb_test = y_prob_nb_test[:, 1]

y_pred_lr_test = lr_clf.predict(test_X)
y_prob_lr_test = lr_clf.predict_proba(test_X)
y_prob_lr_test = y_prob_lr_test[:, 1]

y_pred_rf_test = rf_clf.predict(test_X)
y_prob_rf_test = rf_clf.predict_proba(test_X)
y_prob_rf_test = y_prob_rf_test[:, 1]

y_true_nn_test, y_prob_nn_test = get_predictions(NN_model, device, test_loader, sigmoid)
y_pred_nn_test = np.where(y_prob_nn_test > 0.5, 1, 0)

In [None]:
nb_predictions = {'train': {'y_pred': y_pred_nb_train, 'y_prob': y_prob_nb_train},
                  'test': {'y_pred': y_pred_nb_test, 'y_prob': y_prob_nb_test}}

lr_predictions = {'train': {'y_pred': y_pred_lr_train, 'y_prob': y_prob_lr_train},
                  'test': {'y_pred': y_pred_lr_test, 'y_prob': y_prob_lr_test}}

rf_predictions = {'train': {'y_pred': y_pred_rf_train, 'y_prob': y_prob_rf_train},
                  'test': {'y_pred': y_pred_rf_test, 'y_prob': y_prob_rf_test}}

nn_predictions = {'train': {'y_pred': y_pred_nn_train, 'y_prob': y_prob_nn_train, 'y_true': y_true_nn_train},
                  'test': {'y_pred': y_pred_nn_test, 'y_prob': y_prob_nn_test, 'y_true': y_true_nn_test}}

In [None]:
# Save predictions
with open('nb_predictions.pkl', 'wb') as f:
    pickle.dump(nb_predictions, f)
    
with open('lr_predictions.pkl', 'wb') as f:
    pickle.dump(lr_predictions, f)
    
with open('rf_predictions.pkl', 'wb') as f:
    pickle.dump(rf_predictions, f)
    
with open('nn_predictions.pkl', 'wb') as f:
    pickle.dump(nn_predictions, f)