In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings

# Desactivar advertencias innecesarias
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Cargar datos desde archivos CSV
def load_data(file_path):
    return pd.read_csv(file_path)

# Categorizar valores en función de cuartiles
def categorize_values(data):
    quartiles = data['Y'].quantile([0.25, 0.5, 0.75])
    low, medium, high = quartiles.iloc[0], quartiles.iloc[1], quartiles.iloc[2]
    return data['Y'].apply(lambda x: 'bajo' if x <= low else ('medio' if x <= medium else 'alto'))

# Evaluar rendimiento del clasificador
def evaluate_classifier(X, y, classifier):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = classifier() if classifier != LogisticRegression else LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division='warn')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("\nEvaluación de", classifier.__name__)
    print("Precisión:", accuracy)
    print("Sensibilidad:", recall)
    print("F1 Score:", f1)

# Imprimir nombre del conjunto de datos
def print_dataset_name(name):
    print("\nConjunto de datos evaluado:", name)

# Conjuntos de datos
datasets = {
    "Swedish Auto Insurance": "AutoInsurSweden.csv",
    "Wine Quality": "wine-Quality.csv",
    "Pima Indians Diabetes": "pima-indians-diabetes.csv"
}

# Evaluar cada conjunto de datos
for name, file_path in datasets.items():
    print_dataset_name(name)
    data = load_data(file_path)
    if name == "Swedish Auto Insurance":
        data['Y_category'] = categorize_values(data)
        X, y = data[['X']], data['Y_category']
    elif name == "Wine Quality":
        X, y = data.drop('quality', axis=1), data['quality']
    else:
        X, y = data.drop('Class variable (0 or 1)', axis=1), data['Class variable (0 or 1)']
    classifiers = [LogisticRegression, KNeighborsClassifier, SVC, GaussianNB, MLPClassifier]
    for classifier in classifiers:
        evaluate_classifier(X, y, classifier)



Conjunto de datos evaluado: Swedish Auto Insurance

Evaluación de LogisticRegression
Precisión: 0.6923076923076923
Sensibilidad: 0.6923076923076923
F1 Score: 0.7433198380566802

Evaluación de KNeighborsClassifier
Precisión: 0.6153846153846154
Sensibilidad: 0.6153846153846154
F1 Score: 0.6837606837606838

Evaluación de SVC
Precisión: 0.6923076923076923
Sensibilidad: 0.6923076923076923
F1 Score: 0.7510121457489879

Evaluación de GaussianNB
Precisión: 0.5384615384615384
Sensibilidad: 0.5384615384615384
F1 Score: 0.6357466063348416

Evaluación de MLPClassifier
Precisión: 0.8461538461538461
Sensibilidad: 0.8461538461538461
F1 Score: 0.7756410256410255

Conjunto de datos evaluado: Wine Quality

Evaluación de LogisticRegression
Precisión: 0.575
Sensibilidad: 0.575
F1 Score: 0.5405306044850028

Evaluación de KNeighborsClassifier
Precisión: 0.45625
Sensibilidad: 0.45625
F1 Score: 0.4298823811312021

Evaluación de SVC
Precisión: 0.509375
Sensibilidad: 0.509375
F1 Score: 0.4618002156789197

Eval

In [2]:
import pandas as pd

# Datos proporcionados
data = [
    {"Dataset": "Swedish Auto Insurance", "Classifier": "LogisticRegression", "Accuracy": 0.6923, "Precision": 0.6923, "Recall": 0.6923, "F1 Score": 0.7433},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "KNeighborsClassifier", "Accuracy": 0.6154, "Precision": 0.6154, "Recall": 0.6154, "F1 Score": 0.6838},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "SVC", "Accuracy": 0.6923, "Precision": 0.6923, "Recall": 0.6923, "F1 Score": 0.7510},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "GaussianNB", "Accuracy": 0.5385, "Precision": 0.5385, "Recall": 0.5385, "F1 Score": 0.6357},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "MLPClassifier", "Accuracy": 0.8462, "Precision": 0.8462, "Recall": 0.8462, "F1 Score": 0.7756},
    {"Dataset": "Wine Quality", "Classifier": "LogisticRegression", "Accuracy": 0.575, "Precision": 0.575, "Recall": 0.575, "F1 Score": 0.5405},
    {"Dataset": "Wine Quality", "Classifier": "KNeighborsClassifier", "Accuracy": 0.4563, "Precision": 0.4563, "Recall": 0.4563, "F1 Score": 0.4299},
    {"Dataset": "Wine Quality", "Classifier": "SVC", "Accuracy": 0.5094, "Precision": 0.5094, "Recall": 0.5094, "F1 Score": 0.4618},
    {"Dataset": "Wine Quality", "Classifier": "GaussianNB", "Accuracy": 0.55, "Precision": 0.55, "Recall": 0.55, "F1 Score": 0.5455},
    {"Dataset": "Wine Quality", "Classifier": "MLPClassifier", "Accuracy": 0.5563, "Precision": 0.5563, "Recall": 0.5563, "F1 Score": 0.5181},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "LogisticRegression", "Accuracy": 0.7468, "Precision": 0.7468, "Recall": 0.7468, "F1 Score": 0.7482},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "KNeighborsClassifier", "Accuracy": 0.6623, "Precision": 0.6623, "Recall": 0.6623, "F1 Score": 0.6658},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "SVC", "Accuracy": 0.7662, "Precision": 0.7662, "Recall": 0.7662, "F1 Score": 0.7586},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "GaussianNB", "Accuracy": 0.7662, "Precision": 0.7662, "Recall": 0.7662, "F1 Score": 0.7679},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "MLPClassifier", "Accuracy": 0.6494, "Precision": 0.6494, "Recall": 0.6494, "F1 Score": 0.6297}
]

# Crear DataFrame de pandas
df = pd.DataFrame(data)

# Mostrar la tabla comparativa
print(df)


                   Dataset            Classifier  Accuracy  Precision  Recall  \
0   Swedish Auto Insurance    LogisticRegression    0.6923     0.6923  0.6923   
1   Swedish Auto Insurance  KNeighborsClassifier    0.6154     0.6154  0.6154   
2   Swedish Auto Insurance                   SVC    0.6923     0.6923  0.6923   
3   Swedish Auto Insurance            GaussianNB    0.5385     0.5385  0.5385   
4   Swedish Auto Insurance         MLPClassifier    0.8462     0.8462  0.8462   
5             Wine Quality    LogisticRegression    0.5750     0.5750  0.5750   
6             Wine Quality  KNeighborsClassifier    0.4563     0.4563  0.4563   
7             Wine Quality                   SVC    0.5094     0.5094  0.5094   
8             Wine Quality            GaussianNB    0.5500     0.5500  0.5500   
9             Wine Quality         MLPClassifier    0.5563     0.5563  0.5563   
10   Pima Indians Diabetes    LogisticRegression    0.7468     0.7468  0.7468   
11   Pima Indians Diabetes  

In [4]:
import pandas as pd

# Datos proporcionados
data = [
    {"Dataset": "Swedish Auto Insurance", "Classifier": "LogisticRegression", "Accuracy": 0.6923, "Precision": 0.6923, "Recall": 0.6923, "F1 Score": 0.7433},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "KNeighborsClassifier", "Accuracy": 0.6154, "Precision": 0.6154, "Recall": 0.6154, "F1 Score": 0.6838},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "SVC", "Accuracy": 0.6923, "Precision": 0.6923, "Recall": 0.6923, "F1 Score": 0.7510},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "GaussianNB", "Accuracy": 0.5385, "Precision": 0.5385, "Recall": 0.5385, "F1 Score": 0.6357},
    {"Dataset": "Swedish Auto Insurance", "Classifier": "MLPClassifier", "Accuracy": 0.8462, "Precision": 0.8462, "Recall": 0.8462, "F1 Score": 0.7756},
    {"Dataset": "Wine Quality", "Classifier": "LogisticRegression", "Accuracy": 0.575, "Precision": 0.575, "Recall": 0.575, "F1 Score": 0.5405},
    {"Dataset": "Wine Quality", "Classifier": "KNeighborsClassifier", "Accuracy": 0.4563, "Precision": 0.4563, "Recall": 0.4563, "F1 Score": 0.4299},
    {"Dataset": "Wine Quality", "Classifier": "SVC", "Accuracy": 0.5094, "Precision": 0.5094, "Recall": 0.5094, "F1 Score": 0.4618},
    {"Dataset": "Wine Quality", "Classifier": "GaussianNB", "Accuracy": 0.55, "Precision": 0.55, "Recall": 0.55, "F1 Score": 0.5455},
    {"Dataset": "Wine Quality", "Classifier": "MLPClassifier", "Accuracy": 0.5563, "Precision": 0.5563, "Recall": 0.5563, "F1 Score": 0.5181},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "LogisticRegression", "Accuracy": 0.7468, "Precision": 0.7468, "Recall": 0.7468, "F1 Score": 0.7482},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "KNeighborsClassifier", "Accuracy": 0.6623, "Precision": 0.6623, "Recall": 0.6623, "F1 Score": 0.6658},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "SVC", "Accuracy": 0.7662, "Precision": 0.7662, "Recall": 0.7662, "F1 Score": 0.7586},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "GaussianNB", "Accuracy": 0.7662, "Precision": 0.7662, "Recall": 0.7662, "F1 Score": 0.7679},
    {"Dataset": "Pima Indians Diabetes", "Classifier": "MLPClassifier", "Accuracy": 0.6494, "Precision": 0.6494, "Recall": 0.6494, "F1 Score": 0.6297}
]

# Crear DataFrame de pandas
df = pd.DataFrame(data)

# Obtener el índice del mejor clasificador basado en F1 Score por dataset
idx_max_f1 = df.groupby('Dataset')['F1 Score'].idxmax()

# Obtener los mejores clasificadores por dataset
best_classifiers = df.loc[idx_max_f1][['Dataset', 'Classifier', 'Accuracy', 'Precision', 'Recall']]

# Mostrar los mejores clasificadores por dataset
print(best_classifiers)


                   Dataset     Classifier  Accuracy  Precision  Recall
13   Pima Indians Diabetes     GaussianNB    0.7662     0.7662  0.7662
4   Swedish Auto Insurance  MLPClassifier    0.8462     0.8462  0.8462
8             Wine Quality     GaussianNB    0.5500     0.5500  0.5500
