In [23]:
import pandas as pd
import numpy as np

# preprocessing
from sklearn.preprocessing import StandardScaler

# classifiers
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier

# scoring
from sklearn.model_selection import train_test_split
from imblearn.metrics import classification_report_imbalanced

# to use in the timing decorator
from functools import wraps
from time import time


In [24]:
df = pd.read_csv('../data/interim/data.csv')
df = df[list(df.columns)[1:]+['class']]

X = df.values[:,:-1]
y = df.values[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [25]:
gbc = GradientBoostingClassifier()
rfc = RandomForestClassifier()

In [36]:
def timing(f):
    # Adapted from: https://codereview.stackexchange.com/questions/169870/decorator-to-measure-execution-time-of-a-function
    @wraps(f)
    def wrapper(*args, **kwargs):
        start = time()
        result = f(*args, **kwargs)
        end = time()
        print('Ran function {}. Elapsed time: {}'.format(f.__name__,end-start))
        return result
    return wrapper


@timing
def get_classifier_performance(clf, X_train, X_test, y_train, y_test):
    title = f'{clf.__class__.__name__} - Results'
    div = '='*len(title)
    print(div+'\n'+title+'\n'+div)
    clf.fit(X_train, y_train)
    y_pred_bal = clf.predict(X_test)
    print(classification_report_imbalanced(y_test, y_pred_bal))


In [39]:
get_classifier_performance(gbc, X_train, X_test, y_train, y_test)

GradientBoostingClassifier - Results
                   pre       rec       spe        f1       geo       iba       sup

          A       0.68      0.43      0.98      0.53      0.65      0.40        30
          B       0.50      0.55      0.88      0.52      0.70      0.47        60
          C       0.70      0.84      0.74      0.76      0.79      0.63       142
          D       0.51      0.37      0.93      0.43      0.59      0.32        57
          E       0.42      0.37      0.93      0.39      0.58      0.32        41
          F       0.20      0.17      0.99      0.18      0.41      0.15         6
          G       0.00      0.00      0.99      0.00      0.00      0.00         3

avg / total       0.58      0.60      0.85      0.58      0.69      0.48       339

Ran function get_classifier_performance. Elapsed time: 3.986984968185425


In [40]:
get_classifier_performance(rfc, X_train, X_test, y_train, y_test)

RandomForestClassifier - Results
                   pre       rec       spe        f1       geo       iba       sup

          A       0.37      0.33      0.94      0.35      0.56      0.30        30
          B       0.46      0.52      0.87      0.48      0.67      0.43        60
          C       0.67      0.88      0.69      0.76      0.78      0.62       142
          D       0.59      0.33      0.95      0.43      0.56      0.30        57
          E       0.36      0.22      0.95      0.27      0.46      0.19        41
          F       0.00      0.00      1.00      0.00      0.00      0.00         6
          G       0.00      0.00      1.00      0.00      0.00      0.00         3

avg / total       0.54      0.57      0.83      0.54      0.64      0.44       339

Ran function get_classifier_performance. Elapsed time: 0.10432267189025879


  'precision', 'predicted', average, warn_for)
