# Model Speed

## Benchmarking the Fit and Predict Speed of the Model

1. Benchmarking takes place where each measurement takes 10 rounds.
2. Average and Worst times will be recorded.
3. Model Performance is not the focus here but rather its speed.

In [1]:
import pandas as pd
import json
import time
import lightgbm
import catboost
from sklearn.model_selection import train_test_split

from joblib import load
import warnings
warnings.filterwarnings("ignore")

def parse_tune(tune_file:str):
    f = open(tune_file, "r")
    params = f.readline()
    f.close()
    params = params.replace('\'', '\"')#.replace("False", "\"False\"").replace("True", "\"True\"")
    params = json.loads(str(params))
    print(params)
    return params

def get_indexes():
    indexes = []
    for i in range(100):
        indexes.append(f"t_{i}")
    return indexes

LOOPS = 5 # Each benchmark takes 5 rounds per model/behavior-type

  from pandas.core import (


# 1 Fit Speed

In [2]:
def fit_benchmark(model,x,y,title:str,loops:int):
    times = []
    for i in range(1,loops+1):
        print(f"{title} Round: {i}", end="\r")
        start = time.time()
        model.fit(x,y)
        times.append(time.time()-start)
    print(f"{title}")
    print(f"Best Time: {min(times):.4f}s")
    print(f"Average Time: {sum(times)/len(times):.4f}s")
    print(f"Worst Time: {max(times):.4f}s")

In [3]:
train = pd.read_csv('Dataset/LGBM_TB.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
X_train, X_valid, y_train, y_valid = train_test_split(train.iloc[:,1:101], train.iloc[:,0], test_size=.3, shuffle=True, random_state=1)

#LGBM TB
times = []
lgbm_tb = lightgbm.LGBMClassifier(random_state=1, n_jobs=0, verbose=-1)
fit_benchmark(lgbm_tb, X_train, y_train, "Default LGBM_TB", LOOPS)
print("")

#LGBM TB (Tuned)
tuning = parse_tune('Models/LGBM/Tuning/Manual/RYZEN3b_LGBM_TB.tune')
tuned_lgbm_tb = lightgbm.LGBMClassifier(**tuning, random_state=1, n_jobs=0, verbose=-1,)
fit_benchmark(tuned_lgbm_tb,X_train,y_train, "Tuned LGBM_TB", LOOPS)

Default LGBM_TB Round: 5
Best Time: 1.8340s
Average Time: 2.1216s
Worst Time: 2.3994s

{'boost_from_average': 'True', 'boosting_type': 'gbdt', 'cat_l2': 10, 'class_weight': 'balanced', 'data_sample_strategy': 'goss', 'device': 'gpu', 'enable_bundle': 'True', 'gpu_use_dp': 'False', 'learning_rate': 0.1, 'max_bin': 255, 'max_depth': 5, 'n_estimators': 500, 'num_leaves': 16, 'objective': 'binary', 'tree_learner': 'feature'}
Tuned LGBM_TB Round: 5
Best Time: 18.3425s
Average Time: 20.0647s
Worst Time: 22.8418s


In [4]:
train = pd.read_csv('Dataset/LGBM_IB.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
X_train, X_valid, y_train, y_valid = train_test_split(train.iloc[:,1:101], train.iloc[:,0], test_size=.3, shuffle=True, random_state=1)

#LGBM IB
times = []
lgbm_ib = lightgbm.LGBMClassifier(random_state=1, n_jobs=0, verbose=-1)
fit_benchmark(lgbm_ib, X_train, y_train, "Default LGBM_IB", LOOPS)
print("")

#LGBM IB (Tuned)
tuning = parse_tune('Models/LGBM/Tuning/Manual/RYZEN3b_LGBM_IB.tune')
tuned_lgbm_ib = lightgbm.LGBMClassifier(**tuning, random_state=1, n_jobs=0, verbose=-1,)
fit_benchmark(tuned_lgbm_ib,X_train,y_train, "Tuned LGBM_IB", LOOPS)

Default LGBM_IB Round: 5
Best Time: 0.9284s
Average Time: 1.2992s
Worst Time: 1.8808s

{'boost_from_average': 'True', 'boosting_type': 'gbdt', 'cat_l2': 5, 'class_weight': 'balanced', 'data_sample_strategy': 'goss', 'device': 'gpu', 'enable_bundle': 'True', 'gpu_use_dp': 'False', 'learning_rate': 0.1, 'max_bin': 255, 'max_depth': 5, 'n_estimators': 1000, 'num_leaves': 32, 'objective': 'binary', 'tree_learner': 'feature'}
Tuned LGBM_IB Round: 4

KeyboardInterrupt: 

In [None]:
# train = pd.read_csv('Dataset/CATB_TB.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
# X_train, X_valid, y_train, y_valid = train_test_split(train.iloc[:,1:101], train.iloc[:,0], test_size=.3, shuffle=True, random_state=1)

#CATB TB
times = []
catb_tb = catboost.CatBoostClassifier(random_state=1, thread_count=-1, verbose=0, cat_features=get_indexes(), nan_mode='Min', one_hot_max_size=256)
fit_benchmark(catb_tb, X_train, y_train, "Default CATB_TB", LOOPS)
print("")

# #CATB TB (Tuned)
# tuning = parse_tune('Models/CATB/Tuning/Manual/RYZEN3b_CATB_TB.tune')
# tuned_catb_tb = catboost.CatBoostClassifier(**tuning, random_state=1, thread_count=-1, verbose=0, cat_features=get_indexes(), nan_mode='Min', one_hot_max_size=256)
# fit_benchmark(tuned_catb_tb,X_train,y_train, "Tuned CATB_TB", LOOPS)

Learning rate set to 0.055035
0:	learn: 0.5844191	total: 194ms	remaining: 3m 13s
1:	learn: 0.4753604	total: 221ms	remaining: 1m 50s
2:	learn: 0.3881046	total: 253ms	remaining: 1m 24s
3:	learn: 0.3124715	total: 287ms	remaining: 1m 11s
4:	learn: 0.2604249	total: 315ms	remaining: 1m 2s
5:	learn: 0.2205610	total: 345ms	remaining: 57.1s
6:	learn: 0.1889177	total: 379ms	remaining: 53.8s
7:	learn: 0.1658032	total: 411ms	remaining: 51s
8:	learn: 0.1470864	total: 450ms	remaining: 49.5s
9:	learn: 0.1319520	total: 486ms	remaining: 48.1s
10:	learn: 0.1180435	total: 524ms	remaining: 47.1s
11:	learn: 0.1077427	total: 555ms	remaining: 45.7s
12:	learn: 0.0975256	total: 589ms	remaining: 44.7s
13:	learn: 0.0856545	total: 621ms	remaining: 43.7s
14:	learn: 0.0783755	total: 653ms	remaining: 42.9s
15:	learn: 0.0718489	total: 688ms	remaining: 42.3s
16:	learn: 0.0685743	total: 721ms	remaining: 41.7s
17:	learn: 0.0647847	total: 752ms	remaining: 41s
18:	learn: 0.0608291	total: 784ms	remaining: 40.5s
19:	learn: 

In [None]:
# train = pd.read_csv('Dataset/CATB_IB.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
# X_train, X_valid, y_train, y_valid = train_test_split(train.iloc[:,1:101], train.iloc[:,0], test_size=.3, shuffle=True, random_state=1)

#CATB IB
times = []
catb_ib = catboost.CatBoostClassifier(random_state=1, thread_count=-1, verbose=0, cat_features=get_indexes(), nan_mode='Min', one_hot_max_size=256)
fit_benchmark(catb_ib, X_train, y_train, "Default CATB_IB", LOOPS)
print("")

# #CATB IB (Tuned)
# tuning = parse_tune('Models/CATB/Tuning/Manual/RYZEN3b_CATB_IB.tune')
# tuned_catb_ib = catboost.CatBoostClassifier(**tuning, random_state=1, thread_count=-1, verbose=0, cat_features=get_indexes(), nan_mode='Min', one_hot_max_size=256)
# fit_benchmark(tuned_catb_ib,X_train,y_train, "Tuned CATB_IB", LOOPS)

Learning rate set to 0.055035
0:	learn: 0.5844191	total: 36.1ms	remaining: 36.1s
1:	learn: 0.4753604	total: 67ms	remaining: 33.4s
2:	learn: 0.3881046	total: 99.2ms	remaining: 33s
3:	learn: 0.3124715	total: 127ms	remaining: 31.5s
4:	learn: 0.2604249	total: 156ms	remaining: 31.1s
5:	learn: 0.2205610	total: 185ms	remaining: 30.6s
6:	learn: 0.1889177	total: 213ms	remaining: 30.3s
7:	learn: 0.1658032	total: 245ms	remaining: 30.4s
8:	learn: 0.1470864	total: 279ms	remaining: 30.7s
9:	learn: 0.1319520	total: 310ms	remaining: 30.7s
10:	learn: 0.1180435	total: 341ms	remaining: 30.7s
11:	learn: 0.1077427	total: 372ms	remaining: 30.7s
12:	learn: 0.0975256	total: 402ms	remaining: 30.5s
13:	learn: 0.0856545	total: 447ms	remaining: 31.5s
14:	learn: 0.0783755	total: 476ms	remaining: 31.2s
15:	learn: 0.0718489	total: 510ms	remaining: 31.3s
16:	learn: 0.0685743	total: 539ms	remaining: 31.2s
17:	learn: 0.0647847	total: 568ms	remaining: 31s
18:	learn: 0.0608291	total: 598ms	remaining: 30.9s
19:	learn: 0.0

# 2 Predict Speed

In [None]:
DF_LGBM_TB = pd.read_csv('Dataset/LGBM_TB_Test.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
DF_LGBM_IB = pd.read_csv('Dataset/LGBM_IB_Test.csv', low_memory=False)
DF_CATB_TB = pd.read_csv('Dataset/CATB_TB_Test.csv', low_memory=False) #<== Point these to the proper Test/Holdout datasets.
DF_CATB_IB = pd.read_csv('Dataset/CATB_IB_Test.csv', low_memory=False)
DF_CATB_IB.iloc[:,1:101] = DF_CATB_IB.iloc[:,1:101].astype('str')
DF_CATB_IB.replace("nan", "NaN", inplace=True)

def pred_benchmark(model,x,y,title:str,loops:int):
    times = []
    for i in range(1,loops+1):
        print(f"{title} Round: {i}", end="\r")
        start = time.time()
        model.predict(x)
        times.append(time.time()-start)
    print(f"{title}")
    print(f"Best Time: {min(times):.4f}s")
    print(f"Average Time: {sum(times)/len(times):.4f}s")
    print(f"Worst Time: {max(times):.4f}s")

In [None]:
pred_benchmark(lgbm_tb, DF_LGBM_TB.iloc[:,1:101], DF_LGBM_TB.iloc[:,0], "Default LightGBM TB", LOOPS)
print("")

pred_benchmark(tuned_lgbm_tb, DF_LGBM_TB.iloc[:,1:101], DF_LGBM_TB.iloc[:,0], "Tuned LightGBM TB", LOOPS)

In [None]:
pred_benchmark(lgbm_ib, DF_LGBM_IB.iloc[:,1:101], DF_LGBM_IB.iloc[:,0], "Default LightGBM IB", LOOPS)
print("")

pred_benchmark(tuned_lgbm_ib, DF_LGBM_IB.iloc[:,1:101], DF_LGBM_IB.iloc[:,0], "Tuned LightGBM IB", LOOPS)

In [None]:
pred_benchmark(catb_tb, DF_CATB_TB.iloc[:,1:101], DF_CATB_TB.iloc[:,0], "Default CatBoost TB", LOOPS)
print("")

# pred_benchmark(tuned_catb_tb, DF_CATB_TB.iloc[:,1:101], DF_CATB_TB.iloc[:,0], "Tuned CatBoost TB", LOOPS)

In [None]:
pred_benchmark(catb_ib, DF_CATB_IB.iloc[:,1:101], DF_CATB_IB.iloc[:,0], "Default CatBoost IB", LOOPS)
print("")

# pred_benchmark(tuned_catb_ib, DF_CATB_IB.iloc[:,1:101], DF_CATB_IB.iloc[:,0], "Tuned CatBoost IB", LOOPS)