In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, fbeta_score, make_scorer
import matplotlib.pyplot as plt
import sys
from sklearn.model_selection import cross_validate, GridSearchCV
from sklearn.linear_model import LogisticRegression
import time

# Project imports

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
sys.path.append('/content/drive/MyDrive/TFG/implementations/machine_learning_tfg')
from src.utils.model_metrics_generator import ModelMetricsGenerator
from src.utils.cross_validation_utils import CrossValidationMetricsResultPrinter
from src.utils.my_metrics import accuracy_precision_recall_specifity_f2_score

# Load data

In [4]:
input_data = pd.read_excel('/content/drive/MyDrive/TFG/implementations/machine_learning_tfg/data/prepared/prepared_ICU_Prediction.xlsx')
#input_data = pd.read_excel('./../data/prepared/prepared_ICU_Prediction.xlsx')
ground_truth = input_data['ICU']
sample_data = input_data.drop('ICU', axis=1)
train_data, test_data, train_truth, test_truth = train_test_split(sample_data, ground_truth, test_size=0.2, shuffle=True, random_state=42)

# Model implementation

## Grid search

In [5]:
def logit_hyperparam_grid_search():
  """ Perform Grid search for logit model

  """
  
  #hyperparameter values
  solver = ['liblinear', 'newton-cg','sag', 'lbfgs']
  penalty = ['l2', 'l1']
  C = [1.0, 0.75, 0.5, 0.25]
  class_weight = [{0:1, 1:1}, {0:1, 1:5}, {0:1, 1:10}]
  param_grid = dict(solver=solver,C=C, penalty=penalty, class_weight=class_weight)
  
  #configuring grid serach
  model = LogisticRegression(max_iter=4000)
  sskfold = StratifiedShuffleSplit(random_state=1)
  scoring = accuracy_precision_recall_specifity_f2_score()
  grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=sskfold, verbose=0, refit='recall', scoring=scoring)
  
  #perform
  grid_result = %time grid.fit(train_data, train_truth)
  return grid_result

In [6]:
grid_result= logit_hyperparam_grid_search()

print(f'El mejor resultado :{grid_result.best_score_} se consigue con {grid_result.best_params_}')

model_metrics_generator = ModelMetricsGenerator(grid_result, test_truth)
model_metrics_generator.predict_model(test_data)
model_metrics_generator.print_results()

CPU times: user 7.56 s, sys: 693 ms, total: 8.25 s
Wall time: 12min 38s
El mejor resultado :0.9685714285714286 se consigue con {'C': 0.25, 'class_weight': {0: 1, 1: 10}, 'penalty': 'l1', 'solver': 'liblinear'}

 Indicadores rendimiento:
Fit time: 0
Predict time: 0.002
Accuracy: 66.18
Precision: 52.66
Recall: 96.12
Specificity: 48.26
F2-score: 82.5


0
0.002
66.18
52.66
96.12
48.26
82.5


In [8]:
def regression_model():
  model = LogisticRegression(penalty='l1', solver='liblinear',  n_jobs=-1, class_weight= {0: 1, 1: 10}, C=0.25 )
  model_metrics_generator = ModelMetricsGenerator(model, test_truth)
  model_metrics_generator.fit_and_predict_model(train_data, train_truth, test_data)
  model_metrics_generator.print_results()

regression_model()


 Indicadores rendimiento:
Fit time: 0.1528
Predict time: 0.0067
Accuracy: 66.18
Precision: 52.66
Recall: 96.12
Specificity: 48.26
F2-score: 82.5


0.1528
0.0067
66.18
52.66
96.12
48.26
82.5


  " = {}.".format(effective_n_jobs(self.n_jobs)))
