In [37]:
# !pip install xgboost

In [27]:
# !pip install hiclass



In [None]:
import pandas as pd
from sklearn.metrics import log_loss
from sklearn.datasets import make_blobs

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from hiclass import LocalClassifierPerNode


from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier

from sklearn.model_selection import train_test_split, KFold, cross_val_score, cross_validate

from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [None]:
def show_metrics(model, meth, fset, x, y):

    if meth == "ovr":
        clf = OneVsRestClassifier(model)
        print("Classification on {} feature set with OneVsRest\n".format(fset))
    elif meth == "ovo":
        clf = OneVsOneClassifier(model)
        print("Classification Regression on {} feature set with OneVsOne\n".format(fset))
    else:
        clf = model
        print("Classification on {} feature set\n".format(fset))
        
    scoring = { 'accuracy' : make_scorer(accuracy_score), 
                'precision' : make_scorer(precision_score, average='macro', zero_division=0),
                'recall' : make_scorer(recall_score, average='macro', zero_division=0), 
                'f1_score' : make_scorer(f1_score, average='macro', zero_division=0)}    
    kfold = KFold(n_splits=10)

    results = cross_validate(   estimator=clf,
                                X=x,
                                y=y,
                                cv=kfold,
                                scoring=scoring)
    print(f"Accuracy :{np.mean(results['test_accuracy'])}")
    print('-'*70)
    print(f"Precision :{np.mean(results['test_precision'])}")
    print('-'*70)
    print(f"Recall :{np.mean(results['test_recall'])}")
    print('-'*70)
    print(f"F1 score :{np.mean(results['test_f1_score'])}")
    print('-'*70)
    return results

In [38]:
# initializing all the model objects with default parameters
model_1 = LogisticRegression()
model_2 = XGBClassifier()
model_3 = RandomForestClassifier()
model_4 = DecisionTreeClassifier()

# Making the final model using voting classifier
final_model = VotingClassifier(
    estimators=[('lr', model_1), ('xgb', model_2), ('rf', model_3), ('tree', model_4)], voting='hard')


In [44]:
voting_clf = show_metrics(final_model, "", "first", X, y)


Logistic Regression on first feature set

Accuracy :0.371
----------------------------------------------------------------------
Precision :0.23811891827396042
----------------------------------------------------------------------
Recall :0.21617401749836346
----------------------------------------------------------------------
F1 score :0.1906089062660763
----------------------------------------------------------------------


In [40]:
clf = RandomForestClassifier(n_estimators=30, max_depth=None,
     min_samples_split=2, random_state=0)

random_forest_cls = show_metrics(clf, "", "first", X, y)

Logistic Regression on first feature set

Accuracy :0.39
----------------------------------------------------------------------
Precision :0.2734948547332607
----------------------------------------------------------------------
Recall :0.24211594776468393
----------------------------------------------------------------------
F1 score :0.20876660529587543
----------------------------------------------------------------------


In [41]:
clf = ExtraTreesClassifier(n_estimators=30, max_depth=None,
     min_samples_split=2, random_state=0)

extra_trees_clf = show_metrics(clf, "", "first", X, y)

Logistic Regression on first feature set

Accuracy :0.385
----------------------------------------------------------------------
Precision :0.27934733512653687
----------------------------------------------------------------------
Recall :0.24224579225861492
----------------------------------------------------------------------
F1 score :0.20468951912187924
----------------------------------------------------------------------


In [42]:
clf = AdaBoostClassifier(n_estimators=100, random_state=0)

ada_boost_cls = show_metrics(clf, "", "first", X, y)

Logistic Regression on first feature set

Accuracy :0.29300000000000004
----------------------------------------------------------------------
Precision :0.19420398166284303
----------------------------------------------------------------------
Recall :0.2095582648289625
----------------------------------------------------------------------
F1 score :0.17191564219728256
----------------------------------------------------------------------


In [43]:
rf = RandomForestClassifier()
classifier = LocalClassifierPerNode(local_classifier=rf)

hiclass_clf = show_metrics(classifier, "", "first", X, y)

Logistic Regression on first feature set

Accuracy :0.406
----------------------------------------------------------------------
Precision :0.3212613198063612
----------------------------------------------------------------------
Recall :0.2934929236695795
----------------------------------------------------------------------
F1 score :0.2828101336583594
----------------------------------------------------------------------
