In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_validate, StratifiedShuffleSplit
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, fbeta_score, plot_confusion_matrix
import matplotlib.pyplot as plt
import time
import sys

# Project files


In [5]:
sys.path.append('./drive/MyDrive/TFG/implementations/machine_learning_tfg/')
from src.utils.model_metrics_generator import ModelMetricsGenerator 
from src.utils.cross_validation_utils import CrossValidationMetricsResultPrinter
from src.utils.my_metrics import accuracy_precision_recall_specifity_f2_score

# Load data

In [6]:
input_data = pd.read_excel('/content/drive/MyDrive/TFG/implementations/machine_learning_tfg/data/prepared/prepared_ICU_Prediction.xlsx')
ground_truth = input_data['ICU']
sample_data = input_data.drop('ICU', axis=1)
train_data, test_data, train_truth, test_truth = train_test_split(sample_data, ground_truth, test_size=0.2, shuffle=True)

# Linear SVM

In [None]:
def linear_model(max_iter=1000):
  linear_model = svm.LinearSVC(max_iter=max_iter)
  metric_generator = ModelMetricsGenerator(test_data, test_truth)
  metric_generator.generate_metrics(linear_model, train_data, train_truth)
  metric_generator.print_results()

In [None]:
linear_model()


Fit time: 0.4864s.
Predict time: 0.0023s.
Accuracy: 83.64%.
Precision: 82.19%.
Recall: 65.22%.
Specificity: 92.9%.
F2-score: 68.03%.




In [None]:
linear_model(10000)


Fit time: 1.3351 segundos.
Predict time: 0.0016 segundos.
Accuracy: 84.73%.
Precision: 81.33%.
Recall: 68.54%.
Specificity: 92.47%.
F2-score: 70.77%.


In [9]:
def weighted_linear_model(max_iter=10000, n_splits=10, weights = {0:1, 1:1}):
  metrics = accuracy_precision_recall_specifity_f2_score()
  sskfold = StratifiedShuffleSplit(n_splits=n_splits, random_state=1)
  linear_model = svm.LinearSVC(max_iter=max_iter, class_weight=weights)

  results = cross_validate(linear_model, sample_data, ground_truth, cv=sskfold, scoring=metrics, n_jobs=-1)

  printer = CrossValidationMetricsResultPrinter()
  printer.print_metrics_report(results)

In [10]:
weighted_linear_model(weights={1:10})


Valores medios:
	Fit time: 8.0254s.
	Test time: 0.0081s
	Accuracy: 68.99%.
	Precision: 52.04%.
	Recall: 93.48%.
	Specificity: 56.74%.
	F2 score: 80.57%.


8.0254
0.0081
68.99
52.04
93.48
56.74
80.57


In [11]:
weighted_linear_model(weights={1:6})


Valores medios:
	Fit time: 8.1303s.
	Test time: 0.0087s
	Accuracy: 74.57%.
	Precision: 57.73%.
	Recall: 91.52%.
	Specificity: 66.09%.
	F2 score: 81.79%.


8.1303
0.0087
74.57
57.73
91.52
66.09
81.79


In [12]:
weighted_linear_model(max_iter=30000, weights={1:6})


Valores medios:
	Fit time: 17.3529s.
	Test time: 0.0082s
	Accuracy: 74.71%.
	Precision: 57.87%.
	Recall: 91.52%.
	Specificity: 66.3%.
	F2 score: 81.86%.


17.3529
0.0082
74.71
57.87
91.52
66.3
81.86


In [13]:
weighted_linear_model(max_iter=50000, weights={1:10})


Valores medios:
	Fit time: 28.0606s.
	Test time: 0.0082s
	Accuracy: 69.35%.
	Precision: 52.35%.
	Recall: 93.48%.
	Specificity: 57.28%.
	F2 score: 80.72%.


28.0606
0.0082
69.35
52.35
93.48
57.28
80.72


# Non linear

In [14]:
def weighted_non_linear_model(max_iter=10000, n_splits=10, weights = {0:1, 1:1}, kernel='rbf'):
  metrics = accuracy_precision_recall_specifity_f2_score()
  sskfold = StratifiedShuffleSplit(n_splits=n_splits, random_state=1)
  model = svm.SVC(max_iter=max_iter, class_weight=weights, kernel=kernel)
  
  results = cross_validate(model, sample_data, ground_truth, cv=sskfold, scoring=metrics, n_jobs=-1)
  
  printer = CrossValidationMetricsResultPrinter()
  printer.print_metrics_report(results)

weighted_non_linear_model(max_iter=1000)




Valores medios:
	Fit time: 0.4206s.
	Test time: 0.05s
	Accuracy: 81.74%.
	Precision: 80.72%.
	Recall: 59.35%.
	Specificity: 92.93%.
	F2 score: 62.61%.


0.4206
0.05
81.74
80.72
59.35
92.93
62.61


In [20]:
weighted_non_linear_model(weights={1:6}, kernel='poly')


Valores medios:
	Fit time: 0.4224s.
	Test time: 0.0465s
	Accuracy: 75.29%.
	Precision: 58.21%.
	Recall: 93.7%.
	Specificity: 66.09%.
	F2 score: 83.44%.


0.4224
0.0465
75.29
58.21
93.7
66.09
83.44


In [19]:
weighted_non_linear_model(weights={1:6}, kernel='sigmoid')


Valores medios:
	Fit time: 0.6667s.
	Test time: 0.0708s
	Accuracy: 40.36%.
	Precision: 35.86%.
	Recall: 100.0%.
	Specificity: 10.54%.
	F2 score: 73.65%.


0.6667
0.0708
40.36
35.86
100.0
10.54
73.65


In [18]:
weighted_non_linear_model(weights={1:3}, kernel='sigmoid')


Valores medios:
	Fit time: 0.6385s.
	Test time: 0.0756s
	Accuracy: 72.39%.
	Precision: 55.96%.
	Recall: 82.39%.
	Specificity: 67.39%.
	F2 score: 75.23%.


0.6385
0.0756
72.39
55.96
82.39
67.39
75.23
