In [None]:
# Librerías
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, jaccard_score, log_loss, classification_report
)

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Cargar datos
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillUp/labs/ML-FinalAssignment/Weather_Data.csv"
df = pd.read_csv(url)

# Preprocesamiento
df_proc = pd.get_dummies(data=df, columns=['RainToday', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])
df_proc.replace(['No', 'Yes'], [0, 1], inplace=True)
df_proc.drop('Date', axis=1, inplace=True)
df_proc = df_proc.astype(float)

X = df_proc.drop(columns='RainTomorrow', axis=1)
y = df_proc['RainTomorrow']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# Modelos
knn = KNeighborsClassifier(n_neighbors=4)
tree = DecisionTreeClassifier()
lr = LogisticRegression(solver='liblinear')
svm = SVC(probability=True)

# Entrenar modelos
knn.fit(X_train, y_train)
tree.fit(X_train, y_train)
lr.fit(X_train, y_train)
svm.fit(X_train, y_train)

# Predicciones
pred_knn = knn.predict(X_test)
pred_tree = tree.predict(X_test)
pred_lr = lr.predict(X_test)
pred_svm = svm.predict(X_test)

# Probabilidades (solo para log_loss y AUC en LR y SVM)
proba_lr = lr.predict_proba(X_test)
proba_svm = svm.predict_proba(X_test)

# Métricas
results = {
    "KNN": {
        "Accuracy": accuracy_score(y_test, pred_knn),
        "Jaccard": jaccard_score(y_test, pred_knn),
        "F1": f1_score(y_test, pred_knn),
        "LogLoss": None
    },
    "Decision Tree": {
        "Accuracy": accuracy_score(y_test, pred_tree),
        "Jaccard": jaccard_score(y_test, pred_tree),
        "F1": f1_score(y_test, pred_tree),
        "LogLoss": None
    },
    "Logistic Regression": {
        "Accuracy": accuracy_score(y_test, pred_lr),
        "Jaccard": jaccard_score(y_test, pred_lr),
        "F1": f1_score(y_test, pred_lr),
        "LogLoss": log_loss(y_test, proba_lr)
    },
    "SVM": {
        "Accuracy": accuracy_score(y_test, pred_svm),
        "Jaccard": jaccard_score(y_test, pred_svm),
        "F1": f1_score(y_test, pred_svm),
        "LogLoss": log_loss(y_test, proba_svm)
    }
}

# DataFrame de métricas
metrics_df = pd.DataFrame(results).T
print("=== Comparación de Modelos ===")
print(metrics_df)

# Reportes de clasificación
print("\n=== Informes de Clasificación ===")
print("\nKNN:\n", classification_report(y_test, pred_knn))
print("\nÁrbol de Decisión:\n", classification_report(y_test, pred_tree))
print("\nRegresión Logística:\n", classification_report(y_test, pred_lr))
print("\nSVM:\n", classification_report(y_test, pred_svm))