In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Cargado de datos

In [None]:
import os

In [None]:
file_path = '/content/drive/My Drive/ProyectoTesis/FilteredData/'

In [None]:
mainFolders = os.listdir(file_path)

In [None]:
allFolders = {}
for folder in mainFolders:
  if folder != 'AllFolderData.pkl':
    allFolders[folder] = os.listdir(file_path + folder)

In [None]:
import pickle as pkl

In [None]:
allFolderData = {}

In [None]:
for key, value in allFolders.items():
  allFolderData[key] = {}
  for item in value:
    with open(f'/content/drive/My Drive/ProyectoTesis/FilteredData/{key}/{item}', 'rb') as file:
      allFolderData[key][item] = pkl.load(file)

## Preparar datos

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

def ScaleDf(Df):
  scaler = StandardScaler()
  features = Df.columns[:-1]
  Df[features] = scaler.fit_transform(Df[features])

def LabelEncodingDf(Df):
  y = Df[Df.columns[-1]]
  label_encoder = LabelEncoder()
  y_encoded = label_encoder.fit_transform(y)
  Df[Df.columns[-1]] = y_encoded

def PrepareData(Df):
  ScaleDf(Df)
  LabelEncodingDf(Df)

In [None]:
for folder, files in allFolderData.items():
  for key, df in files.items():
    PrepareData(allFolderData[folder][key])


In [None]:
import pickle as pkl

In [None]:
with open('/content/drive/My Drive/ProyectoTesis/FilteredData/AllFolderData.pkl', 'wb') as file:
  pkl.dump(allFolderData, file)

# Cargando Objetos

## Cargando datos

In [None]:
import pickle as pkl

In [None]:
with open('/content/drive/My Drive/ProyectoTesis/FilteredData/AllFolderData.pkl', 'rb') as file:
  allFolderData = pkl.load(file)

In [None]:
# for key, value in allFolders.items():
#   for item in value:
#     if key=='NotchFilters':
#       with open(f'/content/drive/My Drive/ProyectoTesis/FilteredData/{key}/{item}', 'rb') as file:
#         allFolderData[key][item] = pkl.load(file)

## Cargando modelos

In [None]:
%pip install autogluon.tabular

In [None]:
from autogluon.tabular import TabularPredictor

In [None]:
path_to_predictor = '/content/drive/My Drive/ProyectoTesis/Autogluon/Modelos2'

In [None]:
predictor = TabularPredictor.load(path_to_predictor)

In [None]:
model_names = predictor.model_names()

In [None]:
for model_name in model_names:
    # Load the model
    model = predictor._trainer.load_model(model_name)

    # Try to get the hyperparameters, use params as a fallback
    hyperparameters = getattr(model, 'hyperparameters', model.params)
    print(f"Hiperparámetros para el modelo '{model_name}': {hyperparameters}")

# Probando Modelos

## Creando el dataframe para guardar metricas

In [None]:
import pandas as pd

In [None]:
columnas_metricas_df = ['Filtro', 'Nombre', 'Modelo', 'Accuracy', 'Precision', 'Recall', 'F1-score']
metrics_df = pd.DataFrame(columns=columnas_metricas_df)


## Funciones

In [None]:
from sklearn.metrics import confusion_matrix

def GetConfusionMatrix(model_name, X_test, y_test):
  # Predict on the test data
  y_pred = predictor.predict(X_test, model=model_name)
  conf_matrix = confusion_matrix(y_test, y_pred)
  return conf_matrix

In [None]:
def MetricsDfConverter(classes, metrics):
  cat1, cat2, macro = metrics
  columns = ["Precision", "Recall", "F1-Score"]
  df = pd.DataFrame(metrics, classes, columns)

  for column in columns:
    df[column] = df[column].apply(lambda x: f"{x:.4f}")

  return df

In [None]:
import numpy as np

def MetricsCalculator(confusion_matrix, model=None):
  bonafide, spoof = confusion_matrix

  ps = spoof[1]/(spoof[1]+bonafide[1])
  rs = spoof[1]/sum(spoof)
  f1s = 2*(ps*rs)/(ps+rs)

  s_metrics = [ps,rs,f1s]


  pb = bonafide[0]/(bonafide[0]+spoof[0])

  if np.isnan(pb):
    pb = 0

  rb = bonafide[0]/sum(bonafide)
  f1b = 2*(pb*rb)/(pb+rb)

  if np.isnan(f1b):
    f1b = 0

  b_metrics = [pb,rb,f1b]


  macro_p = (ps+pb)/2
  macro_r = (rs + rb)/2
  macro_f1 = (f1s + f1b)/2

  accuracy = (spoof[1]+bonafide[0])/(sum(bonafide)+sum(spoof))

  macro_metrics = [accuracy, macro_p, macro_r, macro_f1]

  classes = ["spoof", "bonafide", "Macro-Avg"]
  metrics = [s_metrics, b_metrics, macro_metrics]

  # print(f"\n************ {model} ************")
  # print(f"Accuracy: {(spoof[1]+bonafide[0])/(sum(bonafide)+sum(spoof)):.4f}\n")
  # print(MetricsDfConverter(classes, metrics))
  # print("\n**********************************\n")

  return macro_metrics



In [None]:
from sklearn.model_selection import train_test_split

def TestModel(model_name, data):
  y = data['Label']
  train_data, test_data = train_test_split(data, test_size=0.2, random_state=42, stratify=y)

  X_test = test_data.drop('Label', axis=1)
  y_test = test_data['Label']

  conf_m = GetConfusionMatrix(model_name, X_test, y_test)
  return MetricsCalculator(conf_m)


In [None]:
allFolderData.keys()

## Filtro de media

In [None]:
datos_filtrados_media = allFolderData['Media']

In [None]:
for key, value in datos_filtrados_media.items():
  for model_name in predictor.model_names():
    datos = ['Media', key, model_name]
    metricas = TestModel(model_name, datos_filtrados_media[key])
    metrics_df.loc[len(metrics_df)] = datos + metricas

In [None]:
metrics_df

## Filtro de Mediana

In [None]:
datos_filtrados_mediana = allFolderData['Mediana']

In [None]:
for key, value in datos_filtrados_mediana.items():
  for model_name in predictor.model_names():
    datos = ['Mediana', key, model_name]
    metricas = TestModel(model_name, datos_filtrados_mediana[key])
    metrics_df.loc[len(metrics_df)] = datos + metricas

In [None]:
metrics_df[metrics_df['Filtro']=='Mediana']

## Filtro de High-pass

In [None]:
datos_filtrados_high_pass = allFolderData['High-pass']

In [None]:
for key, value in datos_filtrados_high_pass.items():
  for model_name in predictor.model_names():
    datos = ['High-pass', key, model_name]
    metricas = TestModel(model_name, datos_filtrados_high_pass[key])
    metrics_df.loc[len(metrics_df)] = datos + metricas

In [None]:
metrics_df[metrics_df['Filtro']=='High-pass']

## Filtro de Low-pass

In [None]:
datos_filtrados_low_pass = allFolderData['Low-pass']

In [None]:
for key, value in datos_filtrados_low_pass.items():
  for model_name in predictor.model_names():
    datos = ['Low-pass', key, model_name]
    metricas = TestModel(model_name, datos_filtrados_low_pass[key])
    metrics_df.loc[len(metrics_df)] = datos + metricas

In [None]:
metrics_df[metrics_df['Filtro']=='Low-pass']

## Filtro de Band-pass

In [None]:
datos_filtrados_Band_pass = allFolderData['Band-pass']

In [None]:
for key, value in datos_filtrados_Band_pass.items():
  for model_name in predictor.model_names():
    datos = ['Band-pass', key, model_name]
    metricas = TestModel(model_name, datos_filtrados_Band_pass[key])
    metrics_df.loc[len(metrics_df)] = datos + metricas

In [None]:
metrics_df[metrics_df['Filtro']=='Band-pass']

## Filtro de Notch

In [None]:
datos_filtrados_notch = allFolderData['NotchFilters']

In [None]:
for key, value in datos_filtrados_notch.items():
  for model_name in predictor.model_names():
    datos = ['NotchFilters', key, model_name]
    metricas = TestModel(model_name, datos_filtrados_notch[key])
    metrics_df.loc[len(metrics_df)] = datos + metricas

In [None]:
metrics_df[metrics_df['Filtro']=='NotchFilters']

## Guardar Df

In [None]:
path = '/content/drive/My Drive/ProyectoTesis/MetricsFiltersDf.pkl'

In [None]:
with open(path, 'wb') as file:
  pkl.dump(metrics_df, file)

# Usar Metricas

In [None]:
import pickle as pkl
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
path = '/content/drive/My Drive/ProyectoTesis/MetricsFiltersDf.pkl'

In [None]:
with open(path, 'rb') as file:
  metrics_df = pkl.load(file)

In [None]:
metrics_df[metrics_df['Filtro']=='NotchFilters']

In [None]:
average_precision, average_recall, average_f1 = (0.990725, 0.9017, 0.9401875)

In [None]:
# Supongamos que 'metrics_df' es tu DataFrame original
# Filtramos los modelos que queremos excluir
filtered_metrics_df = metrics_df[~((metrics_df['Modelo'] == 'LightGBMXT') | (metrics_df['Modelo'] == 'LightGBM'))]

# Agrupamos por 'Filtro' y calculamos el promedio de cada métrica
# Seleccionamos solo las métricas que queremos (excluyendo 'Accuracy')
filter_performance = filtered_metrics_df.groupby('Filtro').mean(numeric_only=True)[['Precision', 'Recall', 'F1-score']]

# Transformamos el DataFrame en formato largo para facilitar el gráfico
filter_performance_long = filter_performance.melt(ignore_index=False, var_name='Métrica', value_name='Valor').reset_index()

In [None]:
# Filtrar solo las filas de Precision para cada filtro y ordenar de menor a mayor
precision_order = filter_performance_long[filter_performance_long['Métrica'] == 'Precision']
precision_order = precision_order.sort_values(by='Valor')['Filtro']

# Reordenar 'Filtro' en el DataFrame original usando el orden basado en Precision
filter_performance_long['Filtro'] = pd.Categorical(filter_performance_long['Filtro'], categories=precision_order, ordered=True)

colors = sns.color_palette("rocket", 3)

# Plot a barplot with the metrics, and add average reference lines for each metric
plt.figure(figsize=(12, 8))
sns.barplot(data=filter_performance_long, x='Filtro', y='Valor', hue='Métrica', palette=colors, alpha=1)

# Add average lines for Precision, Recall, and F1-score
plt.axhline(y=average_precision, color='purple', linestyle='--', label='Precision promedio')
plt.axhline(y=average_recall, color='red', linestyle='--', label='Recall promedio')
plt.axhline(y=average_f1, color='orange', linestyle='--', label='F1-score promedio')

# Set plot labels and title
plt.xlabel('Filtro')
plt.ylabel('Valor Promedio')
plt.legend(loc='lower left')
plt.tight_layout()
plt.show()


In [None]:
filrto_media = metrics_df[metrics_df['Filtro']=='Media']
filrto_media = filrto_media[['Precision', 'Recall', 'F1-score']].mean()
filrto_media

In [None]:
filrto_media = metrics_df[metrics_df['Filtro']=='NotchFilters']
filrto_media = filrto_media[['Precision', 'Recall', 'F1-score']].mean()
filrto_media

In [None]:
# Filter out the first two LightGBM models
filtered_metrics_df = metrics_df[~((metrics_df['Modelo'] == 'LightGBMXT') | (metrics_df['Modelo'] == 'LightGBM'))]

# Reshape the filtered dataframe to a long format for easier plotting of multiple metrics
filtered_metrics_long_df = filtered_metrics_df.melt(id_vars=['Filtro'],
                                                    value_vars=['Accuracy', 'Precision', 'Recall', 'F1-score'],
                                                    var_name='Métrica', value_name='Valor')

# Create a violin plot to show distribution of each metric by filter in a single plot, excluding the first two LightGBM models
plt.figure(figsize=(12, 8))
sns.violinplot(data=filtered_metrics_long_df, x='Métrica', y='Valor', hue='Filtro', inner="quartile")
plt.title('Distribución de Métricas por Filtro (Excluyendo Primeros dos Modelos LightGBM)')
plt.xlabel('Métricas')
plt.ylabel('Valor')
plt.legend(title='Filtro')
plt.tight_layout()
plt.show()


In [None]:
path2 = "/content/drive/My Drive/ProyectoTesis/MeanDataNoFilter.pkl"

In [None]:
with open(path2, 'rb') as file:
  mean_data_no_filter = pkl.load(file)

In [None]:
mean_data_no_filter

In [None]:
metrics_df.to_csv('/content/drive/My Drive/ProyectoTesis/MeanDataNoFilter.csv')

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
figsize = (12, 1.2 * len(metrics_df['Filtro'].unique()))
plt.figure(figsize=figsize)
sns.violinplot(metrics_df, x='Precision', y='Nombre', inner='stick', palette='Dark2')
sns.despine(top=True, right=True, bottom=True, left=True)