- conda install -c conda-forge lightgbm xgboost catboost

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt

#Load data
dataObj = load_wine()
X = dataObj.data
y = dataObj.target

# Create DataFrame with features
dfori = pd.DataFrame(X)
dfori.columns = dataObj.feature_names

# Add class column
dfori.insert(loc=0, column="Class", value=y)

#Filter class 0 and 1
filt = (dfori['Class'] == 0) | (dfori['Class'] == 1)
df = dfori.loc[filt]

# Extract two features 
X = df[['alcohol','malic_acid']].values
y = df['Class'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1, stratify=y)

In [2]:
from sklearn.metrics import accuracy_score, precision_score ,recall_score, f1_score

def calc_score(y_true, y_pred):
    ACC = accuracy_score(y_true=y_true, y_pred=y_pred)
    PRE = precision_score(y_true=y_true, y_pred=y_pred, zero_division=1)
    REC = recall_score(y_true=y_true, y_pred=y_pred, zero_division=1)
    F1 = f1_score(y_true=y_true, y_pred=y_pred, zero_division=1)
    # print(f"ACC:{ACC:6.3f} PRE:{PRE:6.3f} REC:{REC:6.3f} F1:{REC:6.3f}")
    return (ACC, PRE, REC, F1)

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier


In [9]:
base = DecisionTreeClassifier(criterion='entropy', 
                              max_depth=1,
                              random_state=1)
ada = AdaBoostClassifier(base_estimator=base,
                         n_estimators=100, 
                         learning_rate=0.1,
                         random_state=1)

gbm = GradientBoostingClassifier()
xgb = XGBClassifier(eval_metric="logloss", use_label_encoder=False)
lgb = LGBMClassifier()
cat = CatBoostClassifier(verbose=0, n_estimators=100)

classifiers = [
    {'name':'Adaboost','clf':ada},
    {'name':'Gradient Boost','clf':gbm},
    {'name':'XGBoost','clf':xgb},
    {'name':'Light GBM','clf':lgb},
    {'name':'CatBoost','clf':cat}
    ]

In [17]:
dfTemp = pd.DataFrame()

for classifier in classifiers:
    name = classifier['name']
    clf = classifier['clf']

    scores = cross_validate(estimator=clf,
                            X=X_train,
                            y=y_train,
                            cv=10,
                            scoring=['accuracy','precision','recall','f1'],
                            n_jobs=-1)

    scoreTypes = ['test_accuracy','test_precision','test_recall','test_f1']    
    data = {}
    for scoreType in scoreTypes:
        scoreArray = scores[scoreType]
        scoreMean = np.mean(scoreArray)
        data['clf'] = name
        data[scoreType] = f"{scoreMean:6.3f}"
    dfTemp = dfTemp.append(data, ignore_index=True)

display(dfTemp)

Unnamed: 0,clf,test_accuracy,test_f1,test_precision,test_recall
0,Adaboost,0.861,0.873,0.882,0.885
1,Gradient Boost,0.861,0.873,0.877,0.885
2,XGBoost,0.911,0.913,0.93,0.905
3,Light GBM,0.898,0.904,0.922,0.905
4,CatBoost,0.911,0.913,0.95,0.885


In [None]:
clf.fit(X_train, y_train)
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)

In [None]:
df2 = pd.DataFrame()

names = ['ada (train)', 'ada (test)']
y_trues = [y_train, y_test]
y_preds = [y_train_pred, y_test_pred]

for y_true, y_pred, name in zip(y_trues, y_preds, names):

    ACC, PRE, REC, F1 = calc_score(y_true, y_pred)
    data = {'clf': name,
            'ACC': f"{ACC:6.3f}" ,
            'PRE': f"{PRE:6.3f}" ,
            'REC': f"{REC:6.3f}" ,
            'F1': f"{F1:6.3f}"}
    df2 = df2.append(data, ignore_index=True)
    
df2 = df2.set_index(["clf"])
display(df2)