In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from imblearn.under_sampling import EditedNearestNeighbours, OneSidedSelection, CondensedNearestNeighbour, NeighbourhoodCleaningRule, RandomUnderSampler, TomekLinks
from imblearn.over_sampling import ADASYN, BorderlineSMOTE, RandomOverSampler, SMOTE
from imblearn.combine import SMOTEENN, SMOTETomek

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score

In [None]:
def evaluate_sampling(x_train, y_train, x_test, y_test):
    
    enn = EditedNearestNeighbours()
    oss = OneSidedSelection(random_state=0)
    cnn = CondensedNearestNeighbour(random_state=0)
    ncr = NeighbourhoodCleaningRule()
    rus = RandomUnderSampler(random_state=0)
    tl = TomekLinks()
    adasyn = ADASYN(random_state=0)
    blsmote = BorderlineSMOTE(random_state=0)
    ros = RandomOverSampler(random_state=0)
    smote = SMOTE(random_state=0)
    smoteenn = SMOTEENN(random_state=0)
    smotetomek = SMOTETomek(random_state=0)
    
    samplers = [enn, oss, cnn, ncr, rus, tl, adasyn, blsmote, ros, smote, smoteenn, smotetomek]
    
    accuracys = []
    precisions = []
    recalls = []
    rocs = []
    
    for sampler in samplers:
        x_model, y_model = sampler.fit_resample(x_train, y_train)
        
        lr_model = LogisticRegression(C=1.5, max_iter=1000, random_state=0)
        lr_model.fit(x_model, y_model)
        
        prediction = (lr_model.predict_proba(x_test)[:, 1] > 0.5).astype('int32')
        acc = accuracy_score(y_test, prediction)
        precision = precision_score(y_test, prediction)
        recall = recall_score(y_test, prediction)
        roc = roc_auc_score(y_test, prediction)
        
        accuracys.append(acc)
        precisions.append(precision)
        recalls.append(recall)
        rocs.append(roc)
        
        
    metrics = [accuracys, precisions, recalls, rocs]
    
    sampling_name = ['ENN', 'OSS', 'CNN', 'NCR', 'RUS', 'TL', 'ADASYN', 'BLSMOTE', 'ROS', 'SMOTE', 'SMOTE+ENN', 'SMOTE+TOMEK']
    xticks = [i for i in range(len(sampling_name))]
    metric_names = ['Accuracy', 'Precision', 'Recall', 'AUROC']
    
    fig, axes = plt.subplots(len(metrics), 1, figsize=(15, 20), sharex=True)
    
    axes[0].set_title('Model performance based on sampling', fontsize=30)
    axes[len(metrics)-1].set_xlabel('Method of sampling', fontsize=15)
    axes[len(metrics)-1].set_xticks(xticks)
    axes[len(metrics)-1].set_xticklabels(sampling_name, ha='right')
    axes[len(metrics)-1].tick_params(axis='x', labelsize=10, rotation=30)
    
    for idx in range(len(metrics)):
        axes[idx].set_ylabel(metric_names[idx], fontsize=15)
        axes[idx].bar(xticks, metrics[idx])
    
    fig.subplots_adjust(hspace=0.08)
    
    plt.show()
    
    return np.round(accuracys, 2), np.round(precisions, 2), np.round(recalls, 2), np.round(rocs, 2)