In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_validate
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, fbeta_score, plot_confusion_matrix
import matplotlib.pyplot as plt
import time
import sys

# Project files


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
sys.path.append('./drive/MyDrive/TFG/implementations/machine_learning_tfg/')
from src.utils.model_metrics_generator import ModelMetricsGenerator 
from src.utils.cross_validation_utils import CrossValidationMetricsResultPrinter
from src.utils.my_metrics import accuracy_precision_recall_specifity_f2_score

# Load data

In [19]:
input_data = pd.read_excel('/content/drive/MyDrive/TFG/implementations/machine_learning_tfg/data/prepared/prepared_ICU_Prediction.xlsx')
ground_truth = input_data['ICU']
sample_data = input_data.drop('ICU', axis=1)
train_data, test_data, train_truth, test_truth = train_test_split(sample_data, ground_truth, test_size=0.2, shuffle=True)

# Linear SVM

In [41]:
def linear_model(max_iter=1000):
  linear_model = svm.LinearSVC(max_iter=max_iter)
  metric_generator = ModelMetricsGenerator(test_truth)
  metric_generator.generate_metrics(linear_model, train_data, train_truth, test_data)
  metric_generator.print_results()

In [42]:
linear_model()


Fit time: 0.4725 segundos.
Predict time: 0.0031 segundos.
Accuracy: 84.0%.
Precision: 78.48%.
Recall: 69.66%.
Specificity: 90.86%.
F2-score: 71.26%.




In [43]:
linear_model(10000)


Fit time: 1.3351 segundos.
Predict time: 0.0016 segundos.
Accuracy: 84.73%.
Precision: 81.33%.
Recall: 68.54%.
Specificity: 92.47%.
F2-score: 70.77%.


In [51]:
  weights = {1:10}
  weighted_linear_model = svm.LinearSVC(max_iter=50000,class_weight=weights)
  metric_generator = ModelMetricsGenerator(test_truth)
  metric_generator.generate_metrics(weighted_linear_model, train_data, train_truth, test_data)
  metric_generator.print_results()


Fit time: 17.6958 segundos.
Predict time: 0.0039 segundos.
Accuracy: 69.09%.
Precision: 51.22%.
Recall: 94.38%.
Specificity: 56.99%.
F2-score: 80.77%.


In [56]:
def weighted_linear_model(max_iter=10000, cv=10, weights = {1:10}):
  linear_model = svm.LinearSVC(max_iter=max_iter, class_weight=weights)
  metrics = accuracy_precision_recall_specifity_f2_score()
  results = cross_validate(linear_model, sample_data, ground_truth, cv=cv, scoring=metrics, n_jobs=-1)
  printer = CrossValidationMetricsResultPrinter()
  printer.print_metrics(results)

In [57]:
weighted_linear_model()


Valores medios:
	Fit time 7.4601 segundos.
	Test time 0.0076 segundos
	Accuracy 65.6%.
	Precision 49.34%.
	Recall 89.44%.
	Specificity 53.73%.
	F2 score 76.79%.


In [58]:
weighted_linear_model(weights={1:6})


Valores medios:
	Fit time 7.5749 segundos.
	Test time 0.0075 segundos
	Accuracy 72.02%.
	Precision 55.35%.
	Recall 87.03%.
	Specificity 64.53%.
	F2 score 77.96%.


In [60]:
weighted_linear_model(max_iter=30000, weights={1:6})


Valores medios:
	Fit time 16.6454 segundos.
	Test time 0.0079 segundos
	Accuracy 72.02%.
	Precision 55.35%.
	Recall 87.03%.
	Specificity 64.53%.
	F2 score 77.96%.


In [61]:
weighted_linear_model(max_iter=50000)


Valores medios:
	Fit time 27.3556 segundos.
	Test time 0.0076 segundos
	Accuracy 65.97%.
	Precision 49.7%.
	Recall 89.44%.
	Specificity 54.28%.
	F2 score 76.95%.
