In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
file_path = '/content/drive/My Drive/ProyectoTesis/AllFeatureData.pkl'


In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np

In [None]:
allData = pd.read_pickle(file_path)

In [None]:
featureData = allData.copy()

In [None]:
features = allData.columns[:-1]
features

In [None]:
for feature in features:
  featureData[feature] = featureData[feature].apply(lambda x: np.mean(x))

featureData.head()

In [None]:
featureData.hist()

In [None]:
mean_df = featureData.groupby('Label')
mean_df.mean().T["spoof"]

In [None]:
means = {}
for i in featureData.columns[:-1]:
  means[i] = np.mean(featureData[i])

In [None]:
means

In [None]:
spoof = featureData[featureData['Label']=='spoof']
spoof.hist()

In [None]:
bf = featureData[featureData['Label']=='bonafide']
bf.hist()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
X = featureData.drop('Label', axis=1)
y = featureData['Label']

In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_scaled

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y)


In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, ParameterGrid
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
def PrintMetrics(model):
  y_pred = model.predict(X_test)

  accuracy = accuracy_score(y_test, y_pred)
  print(f'Accuracy: {accuracy * 100:.2f}%')

  conf_matrix = confusion_matrix(y_test, y_pred)
  print('Confusion Matrix:')
  print(conf_matrix)

  class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
  print('Classification Report:')
  print(class_report)

  return conf_matrix


# Kernel RBF

In [None]:
svm_rbf = SVC(kernel='rbf', gamma='scale', verbose=True)

In [None]:
svm_rbf.fit(X_train, y_train)

In [None]:

y_pred = svm_rbf.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
print('Classification Report:')
print(class_report)

In [None]:

train_accuracy = svm_rbf.score(X_train, y_train)
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')


test_accuracy = svm_rbf.score(X_test, y_test)
print(f'Testing Accuracy: {test_accuracy * 100:.2f}%')


#Kernel lineal

In [None]:
svm_lineal = SVC(kernel='linear', gamma='scale')

In [None]:
svm_lineal.fit(X_train, y_train)

In [None]:
PrintMetrics(svm_lineal)

# Kernel Polinomial


In [None]:
svm_poly = SVC(kernel='poly', gamma='scale')

In [None]:
svm_poly.fit(X_train, y_train)

In [None]:
PrintMetrics(svm_poly)

# Kernel Sigmoid

In [None]:
svm_sigm = SVC(kernel='sigmoid', gamma='scale')

In [None]:
svm_sigm.fit(X_train, y_train)

In [None]:
PrintMetrics(svm_sigm)

In [None]:
import pickle as pkl

In [None]:

# with open('/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/FirstSVMModel.pkl', 'wb') as file:
#     pkl.dump(svm_classifier, file)

# with open('/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/FirstSVMModel_LabelEncoder.pkl', 'wb') as file:
#     pkl.dump(label_encoder, file)

# with open('/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/FirstSVMModel_Scaler.pkl', 'wb') as file:
#     pkl.dump(scaler, file)

# Weighted classes

In [None]:
svm_rbf_balanced = SVC(kernel='rbf', gamma='scale', verbose=True, class_weight='balanced')

In [None]:
svm_rbf_balanced.fit(X_train, y_train)

In [None]:
PrintMetrics(svm_rbf_balanced)

# Guardar datos

In [None]:
model_names = ["svm_rbf", "svm_lineal", "svm_poly", "svm_sigm", "svm_rbf_balanced"]

In [None]:


for i in range(len(models)):
  with open(f'/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/Regular_{model_names[i]}.pkl', 'wb') as file:
      pkl.dump(models[i], file)

# Cargar Datos

In [None]:
import pickle as pkl

In [None]:
model_names = ["svm_rbf", "svm_lineal", "svm_poly", "svm_sigm", "svm_rbf_balanced"]


In [None]:
loaded_models = []

In [None]:
for model in model_names[:-1]:
  with open(f'/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/Regular_{model}.pkl', 'rb') as file:
      loaded_model = pkl.load(file)
      loaded_models.append((loaded_model, model))


In [None]:
for model in loaded_models:
  print(model[0].get_params())

In [None]:
print(a:=loaded_models[0][0].get_params())
print(b:=load_models[1][0].get_params())
print(a==b)

In [None]:
loaded_models

In [None]:
for model, model_name in loaded_models:
  print("\n")
  print("\n")
  print(f"#################  {model_name}  ######################")
  PrintMetrics(model)
  print("#######################################")
  print("\n")
  print("\n")



#################  svm_rbf  ######################
Accuracy: 97.42%
Confusion Matrix:
[[ 1269  1502]
 [  191 62591]]
Classification Report:
              precision    recall  f1-score   support

    bonafide       0.87      0.46      0.60      2771
       spoof       0.98      1.00      0.99     62782

    accuracy                           0.97     65553
   macro avg       0.92      0.73      0.79     65553
weighted avg       0.97      0.97      0.97     65553


## Imprimir Datos

In [None]:
def MetricsDfConverter(classes, metrics):
  cat1, cat2, macro = metrics
  columns = ["Precision", "Recall", "F1-Score"]
  df = pd.DataFrame(metrics, classes, columns)

  for column in columns:
    df[column] = df[column].apply(lambda x: f"{x:.4f}")

  return df


In [None]:
def MetricsCalculator(confusion_matrix, model=None):
  bonafide, spoof = confusion_matrix

  ps = spoof[1]/(spoof[1]+bonafide[1])
  rs = spoof[1]/sum(spoof)
  f1s = 2*(ps*rs)/(ps+rs)

  s_metrics = [ps,rs,f1s]

  pb = bonafide[0]/(bonafide[0]+spoof[0])
  rb = bonafide[0]/sum(bonafide)
  f1b = 2*(pb*rb)/(pb+rb)

  b_metrics = [pb,rb,f1b]


  macro_p = (ps+pb)/2
  macro_r = (rs + rb)/2
  macro_f1 = (f1s + f1b)/2

  macro_metrics = [macro_p, macro_r, macro_f1]

  classes = ["spoof", "bonafide", "Macro-Avg"]
  metrics = [s_metrics, b_metrics, macro_metrics]

  print(f"\n************ {model} ************")
  print(f"Accuracy: {(spoof[1]+bonafide[0])/(sum(bonafide)+sum(spoof)):.4f}\n")
  print(MetricsDfConverter(classes, metrics))
  print("\n**********************************\n")

  return macro_metrics




In [None]:
svm_rad = MetricsCalculator([[ 1269,  1502],[  191, 62591]], model_names[0])
svm_pol = MetricsCalculator([[  836,  1935],[  176, 62606]], model_names[2])
svm_sig = MetricsCalculator([[  518,  2253], [ 2223, 60559]], model_names[3])



In [None]:
print(model_names)

In [None]:
import time

def MeasureResponseTime(loaded_models):
  for model, model_name in loaded_models:
    ttime = 0
    for i in range(1000):

      start_time = time.time()


      first_instance = X_test[i:i+1]
      first_prediction = model.predict(first_instance)


      end_time = time.time()


      execution_time = end_time - start_time
      ttime += execution_time

    print(f"Model: {model_name}\t\t Execution time: {ttime/1000:.6f} seconds")

## Graficas para los diferentes kernels probados

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#categories = ['Kernel radial', 'Kernel polinomial', 'Kernel sigmoide']
#modelos = [svm_rad, svm_pol, svm_sig]


# Data
def GraphDifferences(categories, modelos):
  categories = categories
  colors = sns.color_palette("rocket", 3)
  modelos = modelos
  metricas = [[modelo[i] for modelo in modelos] for i in range(3)]

  precision, recall, f1_score = metricas  


  x = np.arange(len(categories))  
  


  width = 0.2

  fig, ax = plt.subplots(figsize=(8, 6))

  bars1 = ax.bar(x - width, precision, width, label='Precision', color=colors[0])
  bars2 = ax.bar(x, recall, width, label='Recall', color=colors[1])
  bars3 = ax.bar(x + width, f1_score, width, label='F1-Score', color=colors[2])

  ax.set_xlabel('Modelos')
  ax.set_ylabel('Puntaje')
  ax.set_xticks(x)
  ax.set_xticklabels(categories)

  ax.legend(loc='lower left')

  plt.tight_layout()
  plt.show()


# Comparar diferentes valores de C para kernel radial

## Entrenamiento y guardado de modelos

### Entrenando modelos

In [None]:
C_range = np.logspace(-1, 3, num=5, endpoint=True)
C_range

In [None]:
models_dif_c = []

In [None]:
for c in C_range:
  start_time = time.time()

  model = SVC(kernel='rbf', gamma='scale', C=c, verbose=True)
  model.fit(X_train, y_train)

  finish_time = time.time()
  exec_time = finish_time - start_time

  print(f"Model with C={c} took: {exec_time}")
  models_dif_c.append((model, c))

In [None]:
  model = SVC(kernel='rbf', gamma='scale', C=C_range[0], verbose=True)
  model.fit(X_train, y_train)
  models_dif_c.append((model, c))

In [None]:
models_dif_c.append((model, C_range[0]))

In [None]:
models_dif_c[0][0]

### Guardando modelos

In [None]:
for i in range(len(models_dif_c)):
  with open(f'/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/DifferentC/SVM_Radial_C_{models_dif_c[i][1]}.pkl', 'wb') as file:
      pkl.dump(models_dif_c[i][0], file)

## Cargado de modelos y pruebas con set de datos testing(response_time, metrics)

### Probar los diferentes modelos con datos de prueba

In [None]:
load_models = []

In [None]:
for i in range(len(C_range)):
  with open(f'/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/DifferentC/SVM_Radial_C_{C_range[i]}.pkl', 'rb') as file:
      load_models.append((pkl.load(file), C_range[i]))
      print(load_models)

In [None]:
conf_matrices = []
for model in load_models:
  conf_matrix = PrintMetrics(model[0])
  conf_matrices.append((conf_matrix, model[1]))

conf_matrices

### Obteniendo metricas utiles

In [None]:
metrics_difs_cs = []
for i in range(len(load_models)):
  metrics_difs_cs.append(MetricsCalculator(conf_matrices[i][0], model= conf_matrices[i][1]))

### Creando graficas para cada modelo y sus metricas

In [None]:
categories = [f"{c}" for c in C_range]

GraphDifferences(categories, metrics_difs_cs)

### Midiendo el tiempo de respuesta

In [None]:
MeasureResponseTime(load_models)

# Pruebas con el set de datos del examen humano

In [None]:
prueba_data = featureData.copy()
prueba_data

In [None]:
X = prueba_data.drop('Label', axis=1)
y = prueba_data['Label']

In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [None]:
prueba_data['Label'] = y_encoded

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
for i in range(len(features)):
  prueba_data[features[i]] = X_scaled.T[i]

In [None]:
indexes_for_human_dataset = [42253, 44952, 70840, 115974, 164782, 170033, 172663, 253854, 269530, 290934]

In [None]:
datos = prueba_data.iloc[indexes_for_human_dataset]

In [None]:
X = np.array(datos.drop('Label', axis=1))
y = datos['Label']

In [None]:
y

In [None]:
load_models = []

In [None]:
C_range = np.logspace(-1, 3, num=5, endpoint=True)
C_range

In [None]:
import pickle as pkl

In [None]:
for i in range(len(C_range)):
  with open(f'/content/drive/My Drive/ProyectoTesis/SVM/NoFilter/DifferentC/SVM_Radial_C_{C_range[i]}.pkl', 'rb') as file:
      load_models.append((pkl.load(file), C_range[i]))
      print(load_models)

In [None]:
def GetConfMatrix2(model, X_test, y_test_encoded):
  from sklearn.metrics import classification_report, confusion_matrix
  y_pred = model.predict(X_test)
  conf_matrix = confusion_matrix(y_test_encoded, y_pred.round())
  return conf_matrix

In [None]:
cfms = []

In [None]:
for model, model_name in load_models:
  cfms.append((GetConfMatrix2(model, X, y), model_name))

In [None]:
cfms