<a href="https://colab.research.google.com/github/hackmashine/boosting-a-random-forest/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install xgboost lightgbm seaborn joblib

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,f1_score,log_loss,classification_report,confusion_matrix,roc_auc_score
from sklearn.preprocessing import LabelEncoder
import joblib, random, os

def set_seed(seed=42):
    np.random.seed(seed); random.seed(seed)

set_seed(42)

d=fetch_openml("Fashion-MNIST",version=1,as_frame=False)
X=d["data"].astype(np.float32); y=d["target"].astype(str)
le=LabelEncoder(); y=le.fit_transform(y)

X_tr,X_te,y_tr,y_te=train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)

try:
    import xgboost as xgb
    xgbc=xgb.XGBClassifier(n_estimators=400,learning_rate=0.1,max_depth=6,
                           subsample=0.9,colsample_bytree=0.9,
                           objective="multi:softprob",num_class=len(le.classes_),
                           n_jobs=-1,tree_method="hist")
except:
    xgbc=GradientBoostingClassifier()

try:
    import lightgbm as lgb
    lgbm=lgb.LGBMClassifier(n_estimators=400,learning_rate=0.1,objective="multiclass",
                            num_class=len(le.classes_),subsample=0.9,colsample_bytree=0.9,n_jobs=-1)
except:
    lgbm=ExtraTreesClassifier(n_estimators=400,n_jobs=-1)

rf=RandomForestClassifier(n_estimators=300,max_depth=20,n_jobs=-1)
final=LogisticRegression(max_iter=500,multi_class="multinomial")
stack=StackingClassifier(estimators=[("rf",rf),("xgb",xgbc),("lgbm",lgbm)],
                         final_estimator=final,stack_method="predict_proba",
                         passthrough=False,cv=5,n_jobs=-1)

stack.fit(X_tr,y_tr)
proba=stack.predict_proba(X_te)
pred=proba.argmax(1)

acc=accuracy_score(y_te,pred)
f1m=f1_score(y_te,pred,average="macro")
f1w=f1_score(y_te,pred,average="weighted")
ll=log_loss(y_te,proba,labels=list(range(len(le.classes_))))
roc=roc_auc_score(y_te,proba,multi_class="ovo",average="macro",labels=list(range(len(le.classes_))))
rep=classification_report(y_te,pred,zero_division=0)
cm=confusion_matrix(y_te,pred,labels=list(range(len(le.classes_))))

print("acc",acc)
print("f1_macro",f1m)
print("f1_weighted",f1w)
print("log_loss",ll)
print("roc_ovo_macro",roc)
print(rep)

plt.figure(figsize=(8,6))
sns.heatmap(cm,annot=False,cmap="Blues")
plt.show()

from sklearn.metrics import RocCurveDisplay
from sklearn.preprocessing import label_binarize
y_bin=label_binarize(y_te,classes=list(range(len(le.classes_))))
for i in range(len(le.classes_)):
    RocCurveDisplay.from_predictions(y_bin[:,i],proba[:,i])
plt.show()


