In [89]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### Plots

In [77]:
def plot_decision_boundary(model: torch.nn.Module, X: torch.Tensor, y: torch.Tensor):
    """Plots decision boundaries of model predicting on X in comparison to y.

    Source - https://madewithml.com/courses/foundations/neural-networks/ (with modifications by Daniel Bourke from his course Pythorch for deep learning)
    """
    # Put everything to CPU (works better with NumPy + Matplotlib)
    model.to("cpu")
    X, y = X.to("cpu"), y.to("cpu")

    # Setup prediction boundaries and grid
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 101), np.linspace(y_min, y_max, 101))

    # Make features
    X_to_pred_on = torch.from_numpy(np.column_stack((xx.ravel(), yy.ravel()))).float()

    # Make predictions
    model.eval()
    with torch.inference_mode():
        y_logits = model(X_to_pred_on)

    # Test for multi-class or binary and adjust logits to prediction labels
    if len(torch.unique(y)) > 2:
        y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)  # mutli-class
    else:
        y_pred = torch.round(torch.sigmoid(y_logits))  # binary

    # Reshape preds and plot
    y_pred = y_pred.reshape(xx.shape).detach().numpy()
    plt.contourf(xx, yy, y_pred, cmap=plt.cm.RdYlBu, alpha=0.7)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.RdYlBu)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

In [90]:
def plot_classification_metrics(y_true, y_pred, average=None):
    """
    Create a simple plot with all four classification metrics.
    Parameters:
        y_true: the true value from the dataset
        y_pred: the precitions from the model
        avarage: used in case it is a multi-class classification
    """
    metrics = [0,0,0,0]
    metrics[0] = accuracy_score(y_true, y_pred)
    metrics[1] = precision_score(y_true, y_pred, average)
    metrics[2] = recall_score(y_true, y_pred, average)
    metrics[3] = f1_score(y_true, y_pred, average)
    
    plt.bar(np.arange(4)-.1,metrics,.5)
    plt.xticks([0,1,2,3],['Accuracy','Precision','Recall','F1-score'])
    plt.ylim([.6,1])
    plt.legend(['Metrics'])
    plt.title('Performance metrics')
    plt.show()

In [82]:
def plot_bar_performance_metrics(y_train_true, y_test_true, y_train_pred, y_test_pred, average="weighted"):
    """
    Plot all classification metrics (accuracy, precision, recall and f1 score) in a bar chart.
    Parameters:
        y_train_true: true values from the training set
        y_test_true: true values from the test set
        y_train_pred: prediction from the model using the train set
        y_test_pred: prediction from the model using the test set
        
        if you are using dataloaders, you can access the labels using train/test_loader.dataset.tensors[1] 
            to y_train_true/y_test_true parameters
    """
    # initialize vectors
    train_metrics = [0,0,0,0]
    test_metrics  = [0,0,0,0]
    
    # training
    train_metrics[0] = classification_accuracy(y_train_true, y_train_pred)
    train_metrics[1] = classification_precision(y_train_true, y_train_pred, average=average)
    train_metrics[2] = classification_recall(y_train_true, y_train_pred, average=average)
    train_metrics[3] = classification_f1_score(y_train_true, y_train_pred, average=average)
    
    # test
    test_metrics[0] = classification_accuracy(y_test_true, y_test_pred)
    test_metrics[1] = classification_precision(y_test_true, y_test_pred, average=average)
    test_metrics[2] = classification_recall(y_test_true, y_test_pred, average=average)
    test_metrics[3] = classification_f1_score(y_test_true, y_test_pred, average=average)
    
    
    plt.bar(np.arange(4)-.1,train_metrics,.5)
    plt.bar(np.arange(4)+.1,test_metrics,.5)
    plt.xticks([0,1,2,3],['Accuracy','Precision','Recall','F1-score'])
    plt.ylim([.6,1])
    plt.legend(['Train','Test'])
    plt.title('Performance metrics')
    plt.show()

### Metrics

In [66]:
def get_metric_names(metric_list: list):
    """
    The function receives a list of substrings and it will return all metrics that contains the substrings.
    Parameters:
        metric_list (list): list with substrings of potential metric name.
    return:
        a list with all metrics that matches the substrings provided
    """
    from sklearn.metrics import SCORERS
    
    result = set()
    metrics = []
    for metric_substring in metric_list:
        metrics = [i for i in SCORERS if metric_substring in i]
        
        for m in metrics:
            result.add(m)
    
    return result

In [79]:
def print_performance_metrics_aprf(y_true, y_pred, average=None):
    """
    Combine the main classification metrics accuracy, precision, recall and f1 score.
    Parameters:
        y_true: the true value from the dataset
        y_pred: the precitions from the model
        avarage: used in case it is a multi-class classification
    """
    print(f"Accuracy: {classification_accuracy(y_true, y_pred)*100:.2f}%")
    print(f"Precision: {classification_precision(y_true, y_pred, average)*100:.2f}%")
    print(f"Recall: {classification_recall(y_true, y_pred, average)*100:.2f}%")
    print(f"F1 Score: {classification_f1_score(y_true, y_pred, average)*100:.2f}%")

In [71]:
def max_abs_scaling(X_train, X_test):
    from sklearn.preprocessing import MaxAbsScaler
    
    scaler = MaxAbsScaler()
    scaler.fit(X_train)
    
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test

In [72]:
def min_max_scaling(X_train, X_test):
    from sklearn.preprocessing import MinMaxScaler
    
    scaler = MinMaxScaler()
    scaler.fit(X_train)
    
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test

In [73]:
def standard_scaling(X_train, X_test):
    from sklearn.preprocessing import MinMaxScaler
    
    scaler = StandardScaler()
    scaler.fit(X_train)
    
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    return X_train, X_test

In [87]:
def evaluate_preds(y_true, y_preds, average=None):
    """
    Performs evaluation comparison on y_true labels vs. y_pred labels
    on a classification.
    Parameters:
        y_true: the true value from the dataset
        y_pred: the precitions from the model
        avarage: used in case it is a multi-class classification
    Return:
        metric_dict (dict): it returns a dictionary for future use.
    """
    accuracy = accuracy_score(y_true, y_preds)
    precision = precision_score(y_true, y_preds)
    recall = recall_score(y_true, y_preds)
    f1 = f1_score(y_true, y_preds)
    metric_dict = {"accuracy": round(accuracy, 2),
                   "precision": round(precision, 2),
                   "recall": round(recall, 2),
                   "f1": round(f1, 2)}
    print(f"Acc: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 score: {f1:.2f}")
    
    return metric_dict

### Features

In [70]:
def split_scaling_features(X, y, scaler, test_size=0.2, random_state=None):
    """
    Function that splits the features into train and test, and in addition, performs feature scaling.
    Parameters:
        X: features from the dataset
        y: label from the dataset
        scaler: the type of feature scaling method applied. It can be 'min_max' for MinMaxScaler(), 'max_abs' for MaxAbsScaler(),
                and 'std' for StandardScaler().
    Return:
        X_train, X_test, y_train, y_test
    """
    from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler
    from sklearn.model_selection import train_test_split
    
    X_train, X_test, y_train, y_test = train_test_splitain_test_split(X, y, test_size=test_size, random_state=random_state)
    
    if (scaler == "min_max"):
        scaler = MinMaxScaler()
    elif (scaler == "max_abs"):
        scaler = MaxAbsScaler()
    elif (scaler == "std"):
        scaler = StandardScaler()
    else:
        scaler = StandardScaler()
    
    scaler.fit(X_train)
    
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

In [85]:
def One_Hot_Encoder(data: pd.DataFrame, categorical_features: list):
    """
    This function provides OneHotEncoder solution from a list of feature. Just need to provide the list and it will return the
    DataFrame with the extra columns.
    Parameters:
        data (pd.DataFrame): data frame with all features
        categorical_features (list): list with all categorical columns that need OneHotEncoder
    Return:
        pd.DataFrame
    """
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.compose import ColumnTransformer #make_column_transformer
    
    one_hot_enc = OneHotEncoder()
    transformer = ColumnTransformer("onehot", one_hot_enc, categorical_features, reminder="passthrough")
    transformed_X = transformer.fit_transform(data)
    
    return transformed_X 