In [1]:
import tensorflow as tf
from tensorflow import keras 
import numpy as np
from keras.datasets import fashion_mnist
import pandas as pd
import warnings
warnings.simplefilter(action='ignore')
import lightgbm as lgb
import catboost as cb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score,RandomizedSearchCV
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score,accuracy_score


In [2]:
(X_train,y_train),(X_test,y_test) = fashion_mnist.load_data()

In [3]:
X_train = (X_train / 127) - 1
X_test = (X_test / 127) - 1
X_train = X_train.reshape((-1, 784))
X_test = X_test.reshape((-1, 784))

In [4]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((60000, 784), (60000,), (10000, 784), (10000,))

In [5]:
cv_strategy = KFold(n_splits=5, shuffle=True, random_state=100)

In [6]:
param_grid_lgbm   = {'n_estimators':[50,100,200,300,400,500,700,800,600,1000],'reg_alpha': [0.5,1,1.5,2,3,4,5,8,10,20],
               'reg_lambda': [0.5,1,20,50,100,150,300,500,550,600],
                'max_depth': [2,3,4,5,6,7,8,9,10,12],'min_child_samples':[5,10,15,20,25,30,35,40,45,50],
               'num_leaves':[5,10,25,30,35,40,45,50,55,60]}

param_grid_cb   = {'n_estimators':[50,100,200,300,400,500,700,800,600,1000],'l2_leaf_reg': [0.5,1,2,5,10,15,20,30,40,50],
                'max_depth': [2,3,4,5,6,7,8,9,10,12],'min_child_samples':[5,10,15,20,25,30,35,40,45,50],
                'max_bin':[5,10,25,30,35,40,45,50,55,60]}
param_grid_rf = {'n_estimators': [50,100,200,300,400,500,700,800,600,1000], 'min_samples_split': [2,4,6,8,10,12,14,16,18,25]
                 , 'min_samples_leaf': [2,4,6,8,10,12,14,16,18,25]}


param_grid_lr ={'C': [0.1,1,1.5,2,2.5,3,4,5,6,7] ,'penalty': ['l1','l2']}

params=[param_grid_lr,
        param_grid_rf,
        param_grid_cb,
        param_grid_lgbm]  

In [7]:
estimators= [LogisticRegression(max_iter=1000,solver = 'saga'),
             RandomForestClassifier(n_jobs=-1),
             cb.CatBoostClassifier(thread_count=15,early_stopping_rounds=90,verbose=False),
             lgb.LGBMClassifier(n_jobs=-1)]

In [8]:
ml_result=pd.DataFrame(columns=['model','params','result'])

In [None]:
%%time
for i,estimator in enumerate(estimators):
    ml_result.loc[i,'model'] = str(estimator)
    model=RandomizedSearchCV(estimator=estimator,cv=cv_strategy,\
        param_distributions = params[i],n_jobs = -1,n_iter = 10).fit(X_train,y_train)
    best_params = model.best_params_
    valid_predict = model.best_estimator_.predict(X_test)
    max_res = accuracy_score(y_test,valid_predict)
    ml_result.loc[i,'result'] = max_res
    ml_result.loc[i,'params'] = str(best_params)
    print(ml_result)

In [8]:
%%time
a = estimators[0].fit(X_train,y_train)

Wall time: 29min 56s


In [9]:
%%time
valid_predict = a.predict(X_test)

Wall time: 27 ms


In [10]:
%%time
accuracy_score(y_test,valid_predict)

Wall time: 0 ns


0.8389

In [14]:
%%time
a = estimators[1].fit(X_train,y_train)

Wall time: 6.6 s


In [15]:
%%time
valid_predict = a.predict(X_test)

Wall time: 66.1 ms


In [16]:
%%time
accuracy_score(y_test,valid_predict)

Wall time: 1 ms


0.8777

In [None]:
%%time
a = estimators[2].fit(X_train,y_train)
valid_predict = a.predict(X_test)
accuracy_score(y_test,valid_predict)

In [None]:
%%time
a = estimators[3].fit(X_train,y_train)
valid_predict = a.predict(X_test)
accuracy_score(y_test,valid_predict)