In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_validate, StratifiedShuffleSplit
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, fbeta_score, plot_confusion_matrix
import matplotlib.pyplot as plt
import time
import sys

# Project files


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
sys.path.append('/content/drive/MyDrive/TFG/implementations/machine_learning_tfg/')
from src.utils.model_metrics_generator import ModelMetricsGenerator 
from src.utils.cross_validation_utils import CrossValidationMetricsResultPrinter
from src.utils.my_metrics import accuracy_precision_recall_specifity_f2_score

# Load data

In [7]:
input_data = pd.read_excel('/content/drive/MyDrive/TFG/implementations/machine_learning_tfg/data/prepared/prepared_ICU_Prediction.xlsx')
ground_truth = input_data['ICU']
sample_data = input_data.drop('ICU', axis=1)
train_data, test_data, train_truth, test_truth = train_test_split(sample_data, ground_truth, test_size=0.2, shuffle=True, random_state=42)

# Linear SVM

In [8]:
def linear_model(max_iter=1000):
  linear_model = svm.LinearSVC(max_iter=max_iter)
  metric_generator = ModelMetricsGenerator(linear_model, test_truth)
  metric_generator.fit_and_predict_model(train_data, train_truth, test_data)
  metric_generator.print_results()

In [9]:
linear_model()


 Indicadores rendimiento:
Fit time: 0.4956
Predict time: 0.002
Accuracy: 80.0
Precision: 77.27
Recall: 66.02
Specificity: 88.37
F2-score: 68.0


0.4956
0.002
80.0
77.27
66.02
88.37
68.0




In [None]:
linear_model(10000)


 Indicadores rendimiento:
Fit time: 1.4464
Predict time: 0.0016
Accuracy: 79.64
Precision: 76.4
Recall: 66.02
Specificity: 87.79
F2-score: 67.86


1.4464
0.0016
79.64
76.4
66.02
87.79
67.86


In [10]:
def weighted_linear_model(max_iter=10000, weights = {0:1, 1:1}):
  metrics = accuracy_precision_recall_specifity_f2_score()
  sskfold = StratifiedShuffleSplit(random_state=1)
  linear_model = svm.LinearSVC(max_iter=max_iter, class_weight=weights)

  #cross validation
  results = cross_validate(linear_model, train_data, train_truth, cv=sskfold, scoring=metrics, n_jobs=-1)
  printer = CrossValidationMetricsResultPrinter()
  printer.print_metrics_report(results)

  #fit and predict for getting measures
  metric_generator = ModelMetricsGenerator(linear_model, test_truth)
  metric_generator.fit_and_predict_model(train_data, train_truth, test_data)
  metric_generator.print_results()
  

In [14]:
weighted_linear_model(max_iter=10000, weights={1:10})


Valores medios:
	Fit time: 5.7038
	Test time: 0.0077
	Accuracy: 69.82
	Precision: 51.67
	Recall: 92.86
	Specificity: 59.07
	F2 score: 79.98


5.7038
0.0077
69.82
51.67
92.86
59.07
79.98

 Indicadores rendimiento:
Fit time: 4.4909
Predict time: 0.0024
Accuracy: 71.64
Precision: 57.23
Recall: 96.12
Specificity: 56.98
F2-score: 84.62


4.4909
0.0024
71.64
57.23
96.12
56.98
84.62




In [12]:
weighted_linear_model(max_iter=30000, weights={1:6})


Valores medios:
	Fit time: 12.4902
	Test time: 0.0073
	Accuracy: 75.27
	Precision: 57.31
	Recall: 89.71
	Specificity: 68.53
	F2 score: 80.51


12.4902
0.0073
75.27
57.31
89.71
68.53
80.51

 Indicadores rendimiento:
Fit time: 9.4849
Predict time: 0.0036
Accuracy: 74.91
Precision: 60.49
Recall: 95.15
Specificity: 62.79
F2-score: 85.37


9.4849
0.0036
74.91
60.49
95.15
62.79
85.37


In [13]:
weighted_linear_model(max_iter=30000, weights={1:10})


Valores medios:
	Fit time: 16.9579
	Test time: 0.0078
	Accuracy: 70.91
	Precision: 52.65
	Recall: 92.86
	Specificity: 60.67
	F2 score: 80.46


16.9579
0.0078
70.91
52.65
92.86
60.67
80.46

 Indicadores rendimiento:
Fit time: 13.1695
Predict time: 0.0025
Accuracy: 71.27
Precision: 56.98
Recall: 95.15
Specificity: 56.98
F2-score: 83.9


13.1695
0.0025
71.27
56.98
95.15
56.98
83.9




In [None]:
weighted_linear_model(max_iter=50000, weights={1:10})


Valores medios:
	Fit time: 22.8887
	Test time: 0.0078
	Accuracy: 70.91
	Precision: 52.65
	Recall: 92.86
	Specificity: 60.67
	F2 score: 80.46


22.8887
0.0078
70.91
52.65
92.86
60.67
80.46

 Indicadores rendimiento:
Fit time: 16.8787
Predict time: 0.0017
Accuracy: 71.27
Precision: 56.98
Recall: 95.15
Specificity: 56.98
F2-score: 83.9


16.8787
0.0017
71.27
56.98
95.15
56.98
83.9


# Non linear

In [None]:
def weighted_non_linear_model(max_iter=10000, weights = {0:1, 1:1}, kernel='rbf'):
  metrics = accuracy_precision_recall_specifity_f2_score()
  sskfold = StratifiedShuffleSplit(random_state=1)
  model = svm.SVC(max_iter=max_iter, class_weight=weights, kernel=kernel)
  
  #cross validation
  results = cross_validate(model, train_data, train_truth, cv=sskfold, scoring=metrics, n_jobs=-1)
  printer = CrossValidationMetricsResultPrinter()
  printer.print_metrics_report(results)

  #fit and predict for getting measures
  metric_generator = ModelMetricsGenerator(model, test_truth)
  metric_generator.fit_and_predict_model(train_data, train_truth, test_data)
  metric_generator.print_results()


In [None]:
weighted_non_linear_model(max_iter=1000)


Valores medios:
	Fit time: 0.272
	Test time: 0.0351
	Accuracy: 82.27
	Precision: 80.28
	Recall: 59.14
	Specificity: 93.07
	F2 score: 62.34


0.272
0.0351
82.27
80.28
59.14
93.07
62.34

 Indicadores rendimiento:
Fit time: 0.3028
Predict time: 0.0655
Accuracy: 77.82
Precision: 78.38
Recall: 56.31
Specificity: 90.7
F2-score: 59.67


0.3028
0.0655
77.82
78.38
56.31
90.7
59.67


In [None]:
weighted_non_linear_model(weights={1:6}, kernel='poly')


Valores medios:
	Fit time: 0.2774
	Test time: 0.0322
	Accuracy: 75.73
	Precision: 57.91
	Recall: 92.86
	Specificity: 67.73
	F2 score: 82.65


0.2774
0.0322
75.73
57.91
92.86
67.73
82.65

 Indicadores rendimiento:
Fit time: 0.2939
Predict time: 0.059
Accuracy: 76.36
Precision: 62.03
Recall: 95.15
Specificity: 65.12
F2-score: 85.96


0.2939
0.059
76.36
62.03
95.15
65.12
85.96


In [None]:
weighted_non_linear_model(weights={1:6}, kernel='sigmoid')


Valores medios:
	Fit time: 0.4187
	Test time: 0.0497
	Accuracy: 33.45
	Precision: 32.36
	Recall: 100.0
	Specificity: 2.4
	F2 score: 70.51


0.4187
0.0497
33.45
32.36
100.0
2.4
70.51

 Indicadores rendimiento:
Fit time: 0.4561
Predict time: 0.1024
Accuracy: 46.18
Precision: 41.04
Recall: 100.0
Specificity: 13.95
F2-score: 77.68


0.4561
0.1024
46.18
41.04
100.0
13.95
77.68


In [None]:
weighted_non_linear_model(weights={1:3}, kernel='sigmoid')


Valores medios:
	Fit time: 0.4239
	Test time: 0.0507
	Accuracy: 76.0
	Precision: 60.25
	Recall: 73.43
	Specificity: 77.2
	F2 score: 70.23


0.4239
0.0507
76.0
60.25
73.43
77.2
70.23

 Indicadores rendimiento:
Fit time: 0.453
Predict time: 0.1017
Accuracy: 73.82
Precision: 62.6
Recall: 74.76
Specificity: 73.26
F2-score: 71.96


0.453
0.1017
73.82
62.6
74.76
73.26
71.96
