<a href="https://colab.research.google.com/github/alexalexs/Scikit_learn_Classification_exercises/blob/main/Scikit_learn_Classification_exercises.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Interactive plotting scikit-learn, lightgbm and xgboost estimators metrics using plotly and beakerX in Jupyter Notebook

#### Only for google.colab

In [13]:
from google.colab import output
output.enable_custom_widget_manager()

### This is resolve classification problem from as possible as estimators from sklearn, lightgbm and xgboost, with default hyper parameters

#### Index:
#### 1. [Importing libs](#1)
#### 2. [Setting random seed for repetitions](#2)
#### 3. [Generating classification problem](#4)
#### 4. [Preprocessing data](#5)
#### 5. [Function for calculating metrics](#6)
#### 6. [Going across all estimators](#7)
#### 7. [Leaderboard of estimators](#8)

#### Importing libs 
<a id='1'></a>

In [14]:
%matplotlib inline
from sklearn.ensemble import *
from sklearn.tree import *
from sklearn.linear_model import *
from sklearn.base import *
from sklearn.dummy import *
from sklearn.gaussian_process import *
from sklearn.naive_bayes import *
from sklearn.neighbors import *
from sklearn.svm import *
from sklearn.neural_network import *
from sklearn.discriminant_analysis import *
from sklearn.datasets import *
from sklearn.preprocessing import *
from sklearn.model_selection import *
from sklearn.metrics import *
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
import copy
import time
import random
import pandas as pd
import numpy as np
# from frozendict import frozendict
import matplotlib.pyplot as plt
#!pip install beakerx #not working in colab yet :)
#import beakerx
from IPython.display import clear_output
# clear_output(wait=True)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
!pip install plotly
import plotly
from plotly.offline import iplot
from plotly.graph_objs import graph_objs as go
import ipywidgets as widgets
from IPython.display import display
warnings.filterwarnings("ignore")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


#### Setting random seed for repetitions
<a id='2'></a>

In [15]:
random.seed(434)
np.random.seed(342)

#### This estimators required some  extra conditions unlike others (or may be time consuming)

In [16]:
EXLUDE=['VotingClassifier','MultinomialNB','ComplementNB','RadiusNeighborsClassifier','NuSVC','DummyClassifier','StackingClassifier','CategoricalNB','XGBClassifier']

#### This is lazy way to get instances of all imported estimators

In [17]:
g_=dict(globals().items())

In [18]:
clf_={}
for k,v in g_.items():
    try:
        if is_classifier(v)==True:
            if 'Mixin' not in k and k not in EXLUDE:
                clf_[k]=v
    except:
        pass

#### Generating classification problem
<a id='4'></a>

In [19]:
X, y = make_multilabel_classification(n_samples=200, n_features=3, n_classes=1, n_labels=1)

You can use local data from train.csv

In [20]:
# df=pd.read_csv('train.csv')
# y=df.pop('target')
# X=df.drop(['ID_code']+list(df.columns[4::]),axis=1) # only 3 features
# del df

#### Preprocessing data
<a id='5'></a>

In [21]:
# preprocess dataset, split into training and test part
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=.5, random_state=42)
print(X_train.shape)

(100, 3)


### Going across all estimators 
<a id='7'></a>

In [22]:
class clf_all:

    def __init__(self, clf_list, metrics=None):

        if metrics == None:
            metrics = {'accuracy_score': accuracy_score,
                       'balanced_accuracy_score': balanced_accuracy_score, 'log_loss': log_loss,
                       'roc_auc_score': roc_auc_score,
                       'f1_score': f1_score,
                       'hamming_loss': hamming_loss,
                       'zero_one_loss': zero_one_loss
                       }
        self.clf_list = clf_list
        self.metrics = metrics
        
        # Function for bar plot
        name = list(metrics.keys())
        layout = go.Layout(title='Metrics')
        t = [go.Bar(x=[], y=[], name=k) for k in name]
        fig = go.FigureWidget(data=t, layout=layout)
        display(fig)
        self.fig = fig

        # Method for pie plot
        trace = go.Pie(labels=[i for i in clf_], values=[])
        layout = go.Layout(title='Time consuming')
        fig = go.FigureWidget(data=[trace], layout=layout)
        display(fig)
        self.fig2 = fig

        # Leaderboard table
        dict_leader = {}
        for i in list(metrics.keys()):
            dict_leader[(i)] = {i_2: 0 for i_1,
                                i_2 in enumerate(list(clf_list.keys()))}
        self.dict_leader = dict_leader

    # Method for calculating metrics
    def metrics_calc(self, y, y_pred):
        self.score = {}
        for name, metrics in self.metrics.items():
            self.score[name] = metrics(y, y_pred)
        return self.score

    def fit(self, X_train, y_train, X_valid, y_valid, X_test=None):
        self.clf_arr = dict()
        self.time_ = []
        for name, clf in self.clf_list.items():
            print(name)
            clf = clf()
            t1 = time.time()
            clf.fit(X_train, y_train)
            self.time_.append(time.time()-t1)
            self.clf_arr[name] = {'metrics': self.metrics_calc(
                y_valid, clf.predict(X_valid))}
            self.clf_arr[name]['obj'] = clf
            if hasattr(clf, 'feature_importances_'):
                self.clf_arr[name]['feature_importances_'] = clf.feature_importances_
            # Update plot
            self.update_plot()
        # self.leader_board() # when beakerX will work

    def update_plot(self):
        fig = self.fig
        c = self.clf_arr
        name = list(c[list(c.keys())[0]]['metrics'].keys())
        y_1 = {}
        for i, i_n in enumerate(name):  # count=number of metrics
            y_ = []
            x_ = []
            for k1, v1 in c.items():  # count=number of estimators
                y_.append(v1['metrics'][i_n])
                x_.append(k1)
            y_1[i_n] = y_

        # Pie plot
        for i in self.fig2.data:
            i['values'] = self.time_
        dict_leader = {}  # Dict for leaderboard
        for i in fig.data:
            i['x'] = x_
            i['y'] = y_1[i['name']]
            # Dict for leaderboard
            dict_leader[(i['name'])] = {i_2: y_1[i['name']][i_1]
                                        for i_1, i_2 in enumerate(x_)}
        self.dict_leader = dict_leader

    def leader_board(self):
        pass
        df = pd.DataFrame(self.dict_leader)
        td = beakerx.TableDisplay(df)
        td.addCellHighlighter(beakerx.HeatmapHighlighter('accuracy_score', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['accuracy_score'].max()-0.001, df['accuracy_score'].max(), beakerx.Color.white, beakerx.Color.LIGHT_GRAY))
        td.addCellHighlighter(beakerx.HeatmapHighlighter('log_loss', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['log_loss'].min()+0.001, df['log_loss'].min(), beakerx.Color.WHITE, beakerx.Color.GREEN))
        td.addCellHighlighter(beakerx.HeatmapHighlighter('roc_auc_score', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['roc_auc_score'].max()-0.001, df['roc_auc_score'].max(), beakerx.Color.WHITE, beakerx.Color.RED))
        td.addCellHighlighter(beakerx.HeatmapHighlighter('f1_score', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['f1_score'].max()-0.001, df['f1_score'].max(), beakerx.Color.WHITE, beakerx.Color.CYAN))
        td.addCellHighlighter(beakerx.HeatmapHighlighter('balanced_accuracy_score', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['balanced_accuracy_score'].max()-0.001, df['balanced_accuracy_score'].max(), beakerx.Color.WHITE, beakerx.Color.MAGENTA))
        td.addCellHighlighter(beakerx.HeatmapHighlighter('hamming_loss', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['hamming_loss'].max()-0.001, df['hamming_loss'].max(), beakerx.Color.WHITE, beakerx.Color.ORANGE))
        td.addCellHighlighter(beakerx.HeatmapHighlighter('zero_one_loss', beakerx.HighlightStyle.SINGLE_COLUMN,
                              df['zero_one_loss'].min()+0.001, df['zero_one_loss'].min(), beakerx.Color.WHITE, beakerx.Color.PINK))
        display(td)
        self.td = td


In [23]:
clf_1=clf_all(clf_)

FigureWidget({
    'data': [{'name': 'accuracy_score', 'type': 'bar', 'uid': '16f4574b-8b71-4b31-808e-356e3f95…

FigureWidget({
    'data': [{'labels': [RandomForestClassifier, ExtraTreesClassifier,
                        …

In [24]:
clf_1.fit(X_train, y_train,X_test,y_test)

RandomForestClassifier
ExtraTreesClassifier
BaggingClassifier
GradientBoostingClassifier
AdaBoostClassifier
HistGradientBoostingClassifier
DecisionTreeClassifier
ExtraTreeClassifier
LogisticRegression
LogisticRegressionCV
PassiveAggressiveClassifier
Perceptron
RidgeClassifier
RidgeClassifierCV
SGDClassifier
GaussianProcessClassifier
BernoulliNB
GaussianNB
KNeighborsClassifier
NearestCentroid
LinearSVC
SVC
MLPClassifier
LinearDiscriminantAnalysis
QuadraticDiscriminantAnalysis
LGBMClassifier


### Leaderboard of estimators
<a id='8'></a>