## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

## Import Dataset

In [None]:
df_pen = sns.load_dataset('penguins')

In [None]:
X = df_pen.drop(['island'], axis = 1)
y = df_pen['island']

## Data Pre-processing

In [None]:
categorical_x = ['species', 'sex']
numerical_x = X.drop(categorical_x, axis = 1).columns

In [None]:
## If y is categorical:
y.fillna(y.mode().iloc[0], inplace= True)
##If y is numerical
# y.fillna(y.mean(), inplace= True)
for i in numerical_x:
    X[i].fillna(X[i].mean(), inplace = True)

for i in categorical_x:
    X[i].fillna(X[i].mode().iloc[0], inplace = True)
    
categoricas = pd.get_dummies(X[categorical_x], drop_first=True)
X = pd.concat([categoricas, X[numerical_x]], axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=46)

In [None]:
escalador = StandardScaler()
escalador.fit(X_train)

X_train = escalador.transform(X_train)
X_test = escalador.transform(X_test)

## Multi-Class Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
log_model = LogisticRegression(solver='saga',multi_class="ovr",max_iter=5000)

## GridSearch

In [None]:
penalty = ['l1', 'l2']

C = np.logspace(0, 4, 10)

In [None]:
grid_model = GridSearchCV(log_model,param_grid={'C':C,'penalty':penalty})

In [None]:
grid_model.fit(X_train,y_train)

In [None]:
grid_model.best_params_

## Metrics

In [None]:
y_pred = grid_model.predict(X_test)

In [None]:
metrics.accuracy_score(y_test,y_pred)

In [None]:
metrics.confusion_matrix(y_test,y_pred)

In [None]:
metrics.plot_confusion_matrix(grid_model, X_test,y_test)

In [None]:
metrics.plot_confusion_matrix(grid_model, X_test,y_test,normalize='true')

In [None]:
print(metrics.classification_report(y_test,y_pred))

In [None]:
## Codigo copiado directamente desde la documentacion de scikit-learn
def plot_multiclass_roc(clf, X_test, y_test, n_classes, figsize=(5,5)):
    y_score = clf.decision_function(X_test)

    # structures
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # calculate dummies once
    y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
    for i in range(n_classes):
        fpr[i], tpr[i], _ = metrics.roc_curve(y_test_dummies[:, i], y_score[:, i])
        roc_auc[i] = metrics.auc(fpr[i], tpr[i])

    # roc for each class
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic example')
    for i in range(n_classes):
#         ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
        ax.plot(fpr[i], tpr[i], label= f'ROC curve (area = {roc_auc[i]}) for label {i}')
    ax.legend(loc="best")
    ax.grid(alpha=.4)
    sns.despine()
    plt.show()

In [None]:
plot_multiclass_roc(grid_model, X_test, y_test, n_classes=3, figsize=(16, 10))