# Baseline Model: Logistic Regression

In [1]:
import os
import pdb

from sklearn.linear_model import LogisticRegression

from model_utils.evaluation_tools import *

from model_utils.utils import load_pickle, save_pickle
from model_utils.utils import load_trn_tst_dicts

## Load Data

In [2]:
moon_root = 'C:/Users/chetai/Documents/Projects/data/moonGen/'
data_root = moon_root + 'processed_data/'

trn_dict, tst_dict = load_trn_tst_dicts(data_root)

print(trn_dict.keys())

dict_keys(['uids', 'labels', 'array', 'data'])


In [3]:
print('Data shape:', trn_dict['data'].shape)
print('Label shape:', trn_dict['labels'].shape)
print('Label shape (array):', trn_dict['array'].shape)

Data shape: (12589, 198)
Label shape: (12589,)
Label shape (array): (12589, 11)


In [4]:
x_trn = trn_dict['data']
x_tst = tst_dict['data']

y_trn = trn_dict['labels']

y_trn_array = trn_dict['array']
y_tst_array = tst_dict['array']

## Train Model

In [5]:
lr_params = {
    'penalty': 'l2',
    'C': 10,  # Inverse of regularization weight
    'verbose': 1,
    'n_jobs': -1,
    'random_state': 7
}
model = LogisticRegression(**lr_params)

In [6]:
model.fit(x_trn, y_trn)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    5.3s finished


LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=-1, penalty='l2', random_state=7,
                   solver='lbfgs', tol=0.0001, verbose=1, warm_start=False)

In [7]:
y_pred_trn = model.predict_proba(x_trn)
y_pred_tst = model.predict_proba(x_tst)
print(y_pred_trn.shape)
print(y_pred_tst.shape)

(12589, 11)
(1000, 11)


## Evaluate

In [8]:
trn_out_root = moon_root + 'baseline_outputs/logistic_regression/train/'
tst_out_root = moon_root + 'baseline_outputs/logistic_regression/test/'

trn_settings = {
    'description': 'Logistic Regression Train',
    'corr_fig_save': trn_out_root + 'fig_correlation.png',
    'farpa_fig_save': trn_out_root + 'fig_farpa.png',
    'farpa_stats_save': trn_out_root + 'stats_farpa.pickle',
    'confusion_fig_save': trn_out_root + 'fig_confusion.png',
    'global_stats_save': trn_out_root + 'stats_global.pickle'
}

tst_settings = {
    'description': 'Logistic Regression Test',
    'corr_fig_save': tst_out_root + 'fig_correlation.png',
    'farpa_fig_save': tst_out_root + 'fig_farpa.png',
    'farpa_stats_save': tst_out_root + 'stats_farpa.pickle',
    'confusion_fig_save': tst_out_root + 'fig_confusion.png',
    'global_stats_save': tst_out_root + 'stats_global.pickle'
}

In [9]:
# Train evaluation
evaluate_predictions(y_trn_array, y_pred_trn, trn_settings)

({'Window 0': {'V4': {'thresholds': array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
           0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22,
           0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33,
           0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,
           0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55,
           0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66,
           0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77,
           0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88,
           0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]),
    'f_scores': [0.5319745222929937,
     0.5500826446280992,
     0.5630962115017716,
     0.5763091570988519,
     0.588100028743892,
     0.5976235880886019,
     0.6061059563005089,
     0.615255273616631,
     0.6228336053470117,
     0.6310717952768343,
     0.637004

<Figure size 1152x576 with 0 Axes>

<Figure size 1296x6336 with 0 Axes>

<Figure size 1728x720 with 0 Axes>

In [10]:
# Test evaluation
evaluate_predictions(y_tst_array, y_pred_tst, tst_settings)

({'Window 0': {'V4': {'thresholds': array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
           0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22,
           0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33,
           0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,
           0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55,
           0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66,
           0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77,
           0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88,
           0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]),
    'f_scores': [0.5250596658711217,
     0.5465116279069767,
     0.5643224699828474,
     0.5776997366110623,
     0.5863309352517986,
     0.5934065934065934,
     0.5975723622782447,
     0.6,
     0.6028708133971292,
     0.6070038910505837,
     0.6108374384236454,
 

<Figure size 1152x576 with 0 Axes>

<Figure size 1296x6336 with 0 Axes>

<Figure size 1728x720 with 0 Axes>