# Test XGBoost, LGB, blending

In [57]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from collections import Counter
import mlflow
import sklearn
from bayes_opt import BayesianOptimization
# local imports
from prepare import *
from evaluate import *

### Set new experiment to keep track in mlflow

In [29]:
mlflow.set_experiment('Regressors')

INFO: 'Regressors' does not exist. Creating a new experiment


### Read in initial datasets if needed

In [4]:
#raw_train, raw_train_labels, raw_test, specs, sample = read_raw_csvs()
#raw_train_labels = pd.read_csv('data/train_labels.csv')

### Load large train/test features from Josh's work

In [5]:
reduced_train = pd.read_csv('reduce_train.csv')
reduced_test = pd.read_csv('reduce_test.csv')
reduced_train.shape, reduced_test.shape

categoricals = ['session_title']
cols_to_drop = ['game_session', 'installation_id', 'accuracy_group']

features = joblib.load('features.pkl')

((17690, 890), (1000, 890))

In [51]:
def make_submission(preds):
    assert len(preds)==1000
    sample = pd.read_csv('data/sample_submission.csv')
    submission = pd.DataFrame()
    submission['installation_id'] = sample['installation_id']
    submission['accuracy_group'] = preds
    submission.to_csv('preds.csv',index=False)
    return submission

# Regressors 

In [54]:
from sklearn.ensemble import (
    RandomForestRegressor,
    AdaBoostRegressor,
    GradientBoostingRegressor,
    StackingRegressor,
    VotingRegressor,
    BaggingRegressor,
)


from catboost import CatBoostRegressor

In [34]:
rfr=RandomForestRegressor()
abr = AdaBoostRegressor()
lgbm = LGBMRegressor()

cbr = CatBoostRegressor( loss_function='RMSE',
    task_type="CPU",
    learning_rate=0.05,
    iterations=2000,
    od_type="Iter",
    early_stopping_rounds=500,
    random_seed=42)

## Tune LGBM

In [71]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [72]:
#grid of parameters
gridParams = {
    'learning_rate': [0.05],
    'num_leaves': [31,90,200],
    'boosting_type' : ['gbdt','dart','rf'],
    'objective' : ['regression'],
    'max_depth' : [5,6,7,8],
    'random_state' : [42], 
    'colsample_bytree' : [0.3,0.5,0.7],
    'subsample' : [0.3,0.5,0.7],
    'min_split_gain' : [0.01],
    'min_data_in_leaf':[10],
    'metric':['rmse']
    }
#modelling
reg = LGBMRegressor()
grid = RandomizedSearchCV(reg,gridParams,verbose=1,cv=10,n_jobs = -1,n_iter=10)
grid.fit(X_train,y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   24.8s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   59.2s finished


RandomizedSearchCV(cv=10, error_score=nan,
                   estimator=LGBMRegressor(boosting_type='gbdt',
                                           class_weight=None,
                                           colsample_bytree=1.0,
                                           importance_type='split',
                                           learning_rate=0.1, max_depth=-1,
                                           min_child_samples=20,
                                           min_child_weight=0.001,
                                           min_split_gain=0.0, n_estimators=100,
                                           n_jobs=-1, num_leaves=31,
                                           objective=None, random_state=None,
                                           reg_alpha=0.0, reg_lambda=0.0,
                                           sile...
                   param_distributions={'boosting_type': ['gbdt', 'dart', 'rf'],
                                        'colsample_bytr

In [75]:
grid.best_params_

{'subsample': 0.3,
 'random_state': 42,
 'objective': 'regression',
 'num_leaves': 200,
 'min_split_gain': 0.01,
 'min_data_in_leaf': 10,
 'metric': 'rmse',
 'max_depth': 7,
 'learning_rate': 0.05,
 'colsample_bytree': 0.5,
 'boosting_type': 'gbdt'}

In [74]:
y_pred = get_class_pred(
    grid.predict(X_test),
    reduced_train)
accuracy = accuracy_score(y_test, y_pred)
qwk = cohen_kappa_score(y_test, y_pred, weights="quadratic")
mlflow.log_param("features_shape", X.shape)
mlflow.log_param("estimator", 'xgb')
mlflow.log_metric("Accuracy", accuracy)
mlflow.log_metric("QWK", qwk)

In [69]:
mlflow.start_run(run_name='tuning_lgbm', nested=True)
estimator = lgb.LGBMRegressor(num_leaves=31)

param_grid = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [20, 40]
}

lgbm_gscv = GridSearchCV(estimator, param_grid, cv=3)
lgbm_gscv.fit(X_train, y_train)

print('Best parameters found by grid search are:', lgbm_gscv.best_params_)

mlflow.log_metric('best params', lgbm_gscv.best_params_)
lgbm_best = lgbm_gscv.best_estimator_.fit(X_train, y_train)
y_pred = lgbm_best.predict(X_test)
y_pred = get_class_pred(y_pred,reduced_train)
cohen_kappa_score(y_pred,y_test, weights='quadratic')

Best parameters found by grid search are: {'learning_rate': 0.1, 'n_estimators': 40}


0.5949141335440908

In [38]:
cbr = quick_eval(reduced_train, cbr, pc=True)
lgbm = quick_eval(reduced_train, lgbm, pc=True)
rfr = quick_eval(reduced_train, rfr, pc=True)
abr = quick_eval(reduced_train, abr, pc=True)


estimators = [
    ('lgbm', lgbm),
    ('rfr', rfr),
    ('abr', abr),
    ('cbr', cbr)
]

0:	learn: 1.2409745	total: 30.6ms	remaining: 1m 1s
1:	learn: 1.2244227	total: 58.4ms	remaining: 58.4s
2:	learn: 1.2105847	total: 85.9ms	remaining: 57.2s
3:	learn: 1.1976449	total: 104ms	remaining: 52s
4:	learn: 1.1855048	total: 130ms	remaining: 51.8s
5:	learn: 1.1738067	total: 150ms	remaining: 50s
6:	learn: 1.1633317	total: 175ms	remaining: 49.7s
7:	learn: 1.1531561	total: 193ms	remaining: 48.1s
8:	learn: 1.1445201	total: 212ms	remaining: 46.9s
9:	learn: 1.1357994	total: 240ms	remaining: 47.8s
10:	learn: 1.1280212	total: 259ms	remaining: 46.8s
11:	learn: 1.1211182	total: 281ms	remaining: 46.6s
12:	learn: 1.1142416	total: 300ms	remaining: 45.8s
13:	learn: 1.1085692	total: 317ms	remaining: 45s
14:	learn: 1.1023912	total: 335ms	remaining: 44.4s
15:	learn: 1.0969832	total: 358ms	remaining: 44.4s
16:	learn: 1.0916061	total: 377ms	remaining: 44s
17:	learn: 1.0862135	total: 399ms	remaining: 43.9s
18:	learn: 1.0812548	total: 418ms	remaining: 43.5s
19:	learn: 1.0771770	total: 438ms	remaining: 4

162:	learn: 0.9725699	total: 2.98s	remaining: 33.6s
163:	learn: 0.9723291	total: 3s	remaining: 33.6s
164:	learn: 0.9719423	total: 3.02s	remaining: 33.6s
165:	learn: 0.9717371	total: 3.04s	remaining: 33.6s
166:	learn: 0.9715928	total: 3.06s	remaining: 33.6s
167:	learn: 0.9713523	total: 3.08s	remaining: 33.5s
168:	learn: 0.9712552	total: 3.09s	remaining: 33.5s
169:	learn: 0.9709455	total: 3.11s	remaining: 33.5s
170:	learn: 0.9705171	total: 3.13s	remaining: 33.4s
171:	learn: 0.9701358	total: 3.14s	remaining: 33.4s
172:	learn: 0.9698044	total: 3.16s	remaining: 33.4s
173:	learn: 0.9695861	total: 3.18s	remaining: 33.4s
174:	learn: 0.9692245	total: 3.2s	remaining: 33.3s
175:	learn: 0.9690968	total: 3.21s	remaining: 33.3s
176:	learn: 0.9687468	total: 3.23s	remaining: 33.3s
177:	learn: 0.9683884	total: 3.25s	remaining: 33.2s
178:	learn: 0.9680509	total: 3.27s	remaining: 33.2s
179:	learn: 0.9678707	total: 3.28s	remaining: 33.2s
180:	learn: 0.9676152	total: 3.3s	remaining: 33.2s
181:	learn: 0.967

328:	learn: 0.9249198	total: 5.73s	remaining: 29.1s
329:	learn: 0.9246501	total: 5.75s	remaining: 29.1s
330:	learn: 0.9243599	total: 5.76s	remaining: 29.1s
331:	learn: 0.9242866	total: 5.78s	remaining: 29s
332:	learn: 0.9239692	total: 5.79s	remaining: 29s
333:	learn: 0.9236472	total: 5.81s	remaining: 29s
334:	learn: 0.9234279	total: 5.83s	remaining: 29s
335:	learn: 0.9230562	total: 5.84s	remaining: 28.9s
336:	learn: 0.9227527	total: 5.86s	remaining: 28.9s
337:	learn: 0.9224297	total: 5.88s	remaining: 28.9s
338:	learn: 0.9222085	total: 5.89s	remaining: 28.9s
339:	learn: 0.9219891	total: 5.91s	remaining: 28.8s
340:	learn: 0.9217424	total: 5.93s	remaining: 28.8s
341:	learn: 0.9214182	total: 5.94s	remaining: 28.8s
342:	learn: 0.9213141	total: 5.96s	remaining: 28.8s
343:	learn: 0.9209522	total: 5.97s	remaining: 28.8s
344:	learn: 0.9207412	total: 5.99s	remaining: 28.7s
345:	learn: 0.9204933	total: 6s	remaining: 28.7s
346:	learn: 0.9202314	total: 6.02s	remaining: 28.7s
347:	learn: 0.9201936	t

495:	learn: 0.8866936	total: 8.48s	remaining: 25.7s
496:	learn: 0.8864602	total: 8.51s	remaining: 25.7s
497:	learn: 0.8863878	total: 8.52s	remaining: 25.7s
498:	learn: 0.8861670	total: 8.54s	remaining: 25.7s
499:	learn: 0.8859282	total: 8.55s	remaining: 25.7s
500:	learn: 0.8856149	total: 8.57s	remaining: 25.7s
501:	learn: 0.8854321	total: 8.59s	remaining: 25.6s
502:	learn: 0.8852126	total: 8.61s	remaining: 25.6s
503:	learn: 0.8849781	total: 8.62s	remaining: 25.6s
504:	learn: 0.8846622	total: 8.64s	remaining: 25.6s
505:	learn: 0.8843301	total: 8.66s	remaining: 25.6s
506:	learn: 0.8841316	total: 8.68s	remaining: 25.6s
507:	learn: 0.8839638	total: 8.7s	remaining: 25.5s
508:	learn: 0.8837445	total: 8.71s	remaining: 25.5s
509:	learn: 0.8835241	total: 8.73s	remaining: 25.5s
510:	learn: 0.8832872	total: 8.75s	remaining: 25.5s
511:	learn: 0.8832750	total: 8.76s	remaining: 25.5s
512:	learn: 0.8830315	total: 8.78s	remaining: 25.4s
513:	learn: 0.8828368	total: 8.79s	remaining: 25.4s
514:	learn: 0

659:	learn: 0.8568698	total: 11.2s	remaining: 22.8s
660:	learn: 0.8567095	total: 11.2s	remaining: 22.8s
661:	learn: 0.8565343	total: 11.3s	remaining: 22.8s
662:	learn: 0.8563873	total: 11.3s	remaining: 22.7s
663:	learn: 0.8561581	total: 11.3s	remaining: 22.7s
664:	learn: 0.8559635	total: 11.3s	remaining: 22.7s
665:	learn: 0.8557901	total: 11.3s	remaining: 22.7s
666:	learn: 0.8556045	total: 11.3s	remaining: 22.7s
667:	learn: 0.8554531	total: 11.4s	remaining: 22.7s
668:	learn: 0.8552714	total: 11.4s	remaining: 22.6s
669:	learn: 0.8550855	total: 11.4s	remaining: 22.6s
670:	learn: 0.8550783	total: 11.4s	remaining: 22.6s
671:	learn: 0.8549189	total: 11.4s	remaining: 22.6s
672:	learn: 0.8547539	total: 11.5s	remaining: 22.6s
673:	learn: 0.8545991	total: 11.5s	remaining: 22.6s
674:	learn: 0.8544289	total: 11.5s	remaining: 22.6s
675:	learn: 0.8542715	total: 11.5s	remaining: 22.5s
676:	learn: 0.8540275	total: 11.5s	remaining: 22.5s
677:	learn: 0.8539674	total: 11.5s	remaining: 22.5s
678:	learn: 

825:	learn: 0.8297281	total: 14.2s	remaining: 20.2s
826:	learn: 0.8295938	total: 14.2s	remaining: 20.2s
827:	learn: 0.8293855	total: 14.2s	remaining: 20.2s
828:	learn: 0.8293015	total: 14.3s	remaining: 20.1s
829:	learn: 0.8291000	total: 14.3s	remaining: 20.1s
830:	learn: 0.8290936	total: 14.3s	remaining: 20.1s
831:	learn: 0.8289149	total: 14.3s	remaining: 20.1s
832:	learn: 0.8287379	total: 14.3s	remaining: 20.1s
833:	learn: 0.8286284	total: 14.3s	remaining: 20.1s
834:	learn: 0.8285157	total: 14.4s	remaining: 20s
835:	learn: 0.8285037	total: 14.4s	remaining: 20s
836:	learn: 0.8282457	total: 14.4s	remaining: 20s
837:	learn: 0.8280349	total: 14.4s	remaining: 20s
838:	learn: 0.8278954	total: 14.4s	remaining: 20s
839:	learn: 0.8278677	total: 14.5s	remaining: 20s
840:	learn: 0.8276425	total: 14.5s	remaining: 20s
841:	learn: 0.8274515	total: 14.5s	remaining: 20s
842:	learn: 0.8271271	total: 14.5s	remaining: 19.9s
843:	learn: 0.8270609	total: 14.5s	remaining: 19.9s
844:	learn: 0.8269104	total:

989:	learn: 0.8054013	total: 17.4s	remaining: 17.7s
990:	learn: 0.8052558	total: 17.4s	remaining: 17.7s
991:	learn: 0.8050642	total: 17.4s	remaining: 17.7s
992:	learn: 0.8048665	total: 17.4s	remaining: 17.7s
993:	learn: 0.8046673	total: 17.4s	remaining: 17.7s
994:	learn: 0.8046256	total: 17.5s	remaining: 17.6s
995:	learn: 0.8044936	total: 17.5s	remaining: 17.6s
996:	learn: 0.8042972	total: 17.5s	remaining: 17.6s
997:	learn: 0.8042059	total: 17.5s	remaining: 17.6s
998:	learn: 0.8040755	total: 17.5s	remaining: 17.6s
999:	learn: 0.8039335	total: 17.6s	remaining: 17.6s
1000:	learn: 0.8038773	total: 17.6s	remaining: 17.5s
1001:	learn: 0.8037716	total: 17.6s	remaining: 17.5s
1002:	learn: 0.8035700	total: 17.6s	remaining: 17.5s
1003:	learn: 0.8035331	total: 17.6s	remaining: 17.5s
1004:	learn: 0.8033470	total: 17.7s	remaining: 17.5s
1005:	learn: 0.8033389	total: 17.7s	remaining: 17.5s
1006:	learn: 0.8032093	total: 17.7s	remaining: 17.5s
1007:	learn: 0.8030136	total: 17.7s	remaining: 17.5s
1008

1147:	learn: 0.7841118	total: 20.5s	remaining: 15.2s
1148:	learn: 0.7839667	total: 20.5s	remaining: 15.2s
1149:	learn: 0.7837714	total: 20.6s	remaining: 15.2s
1150:	learn: 0.7837339	total: 20.6s	remaining: 15.2s
1151:	learn: 0.7835567	total: 20.6s	remaining: 15.2s
1152:	learn: 0.7833351	total: 20.6s	remaining: 15.1s
1153:	learn: 0.7831784	total: 20.6s	remaining: 15.1s
1154:	learn: 0.7829691	total: 20.7s	remaining: 15.1s
1155:	learn: 0.7827908	total: 20.7s	remaining: 15.1s
1156:	learn: 0.7827049	total: 20.7s	remaining: 15.1s
1157:	learn: 0.7825476	total: 20.7s	remaining: 15.1s
1158:	learn: 0.7824436	total: 20.7s	remaining: 15.1s
1159:	learn: 0.7823016	total: 20.8s	remaining: 15s
1160:	learn: 0.7821799	total: 20.8s	remaining: 15s
1161:	learn: 0.7821749	total: 20.8s	remaining: 15s
1162:	learn: 0.7819855	total: 20.8s	remaining: 15s
1163:	learn: 0.7818499	total: 20.8s	remaining: 15s
1164:	learn: 0.7817262	total: 20.9s	remaining: 15s
1165:	learn: 0.7816656	total: 20.9s	remaining: 14.9s
1166:

1303:	learn: 0.7641265	total: 23.5s	remaining: 12.5s
1304:	learn: 0.7640002	total: 23.5s	remaining: 12.5s
1305:	learn: 0.7638827	total: 23.5s	remaining: 12.5s
1306:	learn: 0.7637919	total: 23.6s	remaining: 12.5s
1307:	learn: 0.7636132	total: 23.6s	remaining: 12.5s
1308:	learn: 0.7634745	total: 23.6s	remaining: 12.5s
1309:	learn: 0.7633571	total: 23.6s	remaining: 12.4s
1310:	learn: 0.7633348	total: 23.6s	remaining: 12.4s
1311:	learn: 0.7633315	total: 23.6s	remaining: 12.4s
1312:	learn: 0.7632903	total: 23.7s	remaining: 12.4s
1313:	learn: 0.7631163	total: 23.7s	remaining: 12.4s
1314:	learn: 0.7629838	total: 23.7s	remaining: 12.3s
1315:	learn: 0.7628606	total: 23.7s	remaining: 12.3s
1316:	learn: 0.7627401	total: 23.7s	remaining: 12.3s
1317:	learn: 0.7626810	total: 23.7s	remaining: 12.3s
1318:	learn: 0.7625922	total: 23.8s	remaining: 12.3s
1319:	learn: 0.7625506	total: 23.8s	remaining: 12.2s
1320:	learn: 0.7624644	total: 23.8s	remaining: 12.2s
1321:	learn: 0.7624617	total: 23.8s	remaining:

1461:	learn: 0.7451496	total: 26.3s	remaining: 9.68s
1462:	learn: 0.7450615	total: 26.3s	remaining: 9.66s
1463:	learn: 0.7449750	total: 26.3s	remaining: 9.65s
1464:	learn: 0.7448601	total: 26.4s	remaining: 9.63s
1465:	learn: 0.7446619	total: 26.4s	remaining: 9.61s
1466:	learn: 0.7445683	total: 26.4s	remaining: 9.6s
1467:	learn: 0.7445651	total: 26.4s	remaining: 9.58s
1468:	learn: 0.7444586	total: 26.4s	remaining: 9.56s
1469:	learn: 0.7443522	total: 26.5s	remaining: 9.54s
1470:	learn: 0.7442425	total: 26.5s	remaining: 9.52s
1471:	learn: 0.7441098	total: 26.5s	remaining: 9.51s
1472:	learn: 0.7439278	total: 26.5s	remaining: 9.49s
1473:	learn: 0.7438162	total: 26.5s	remaining: 9.47s
1474:	learn: 0.7437034	total: 26.6s	remaining: 9.45s
1475:	learn: 0.7435795	total: 26.6s	remaining: 9.43s
1476:	learn: 0.7434826	total: 26.6s	remaining: 9.42s
1477:	learn: 0.7433119	total: 26.6s	remaining: 9.4s
1478:	learn: 0.7432241	total: 26.6s	remaining: 9.38s
1479:	learn: 0.7431679	total: 26.6s	remaining: 9

1623:	learn: 0.7277331	total: 29.3s	remaining: 6.79s
1624:	learn: 0.7276337	total: 29.3s	remaining: 6.77s
1625:	learn: 0.7276216	total: 29.3s	remaining: 6.75s
1626:	learn: 0.7274873	total: 29.4s	remaining: 6.73s
1627:	learn: 0.7273456	total: 29.4s	remaining: 6.71s
1628:	learn: 0.7272158	total: 29.4s	remaining: 6.7s
1629:	learn: 0.7271097	total: 29.4s	remaining: 6.68s
1630:	learn: 0.7269380	total: 29.4s	remaining: 6.66s
1631:	learn: 0.7269112	total: 29.5s	remaining: 6.64s
1632:	learn: 0.7267966	total: 29.5s	remaining: 6.62s
1633:	learn: 0.7266835	total: 29.5s	remaining: 6.61s
1634:	learn: 0.7266224	total: 29.5s	remaining: 6.59s
1635:	learn: 0.7264774	total: 29.5s	remaining: 6.57s
1636:	learn: 0.7264509	total: 29.5s	remaining: 6.55s
1637:	learn: 0.7262877	total: 29.6s	remaining: 6.53s
1638:	learn: 0.7262515	total: 29.6s	remaining: 6.51s
1639:	learn: 0.7262397	total: 29.6s	remaining: 6.5s
1640:	learn: 0.7261062	total: 29.6s	remaining: 6.48s
1641:	learn: 0.7260693	total: 29.6s	remaining: 6

1781:	learn: 0.7109790	total: 32.1s	remaining: 3.93s
1782:	learn: 0.7109481	total: 32.1s	remaining: 3.91s
1783:	learn: 0.7107724	total: 32.2s	remaining: 3.89s
1784:	learn: 0.7106953	total: 32.2s	remaining: 3.88s
1785:	learn: 0.7106292	total: 32.2s	remaining: 3.86s
1786:	learn: 0.7104930	total: 32.2s	remaining: 3.84s
1787:	learn: 0.7103843	total: 32.2s	remaining: 3.82s
1788:	learn: 0.7102187	total: 32.3s	remaining: 3.8s
1789:	learn: 0.7101352	total: 32.3s	remaining: 3.79s
1790:	learn: 0.7100616	total: 32.3s	remaining: 3.77s
1791:	learn: 0.7099699	total: 32.3s	remaining: 3.75s
1792:	learn: 0.7098446	total: 32.3s	remaining: 3.73s
1793:	learn: 0.7096888	total: 32.4s	remaining: 3.71s
1794:	learn: 0.7095461	total: 32.4s	remaining: 3.7s
1795:	learn: 0.7093859	total: 32.4s	remaining: 3.68s
1796:	learn: 0.7092780	total: 32.4s	remaining: 3.66s
1797:	learn: 0.7092327	total: 32.4s	remaining: 3.64s
1798:	learn: 0.7091622	total: 32.4s	remaining: 3.62s
1799:	learn: 0.7090560	total: 32.5s	remaining: 3

1939:	learn: 0.6949048	total: 34.9s	remaining: 1.08s
1940:	learn: 0.6948974	total: 34.9s	remaining: 1.06s
1941:	learn: 0.6948465	total: 34.9s	remaining: 1.04s
1942:	learn: 0.6947998	total: 34.9s	remaining: 1.02s
1943:	learn: 0.6947274	total: 34.9s	remaining: 1.01s
1944:	learn: 0.6946915	total: 35s	remaining: 988ms
1945:	learn: 0.6945203	total: 35s	remaining: 970ms
1946:	learn: 0.6944020	total: 35s	remaining: 952ms
1947:	learn: 0.6942694	total: 35s	remaining: 934ms
1948:	learn: 0.6941727	total: 35s	remaining: 916ms
1949:	learn: 0.6940105	total: 35s	remaining: 898ms
1950:	learn: 0.6939791	total: 35.1s	remaining: 880ms
1951:	learn: 0.6938927	total: 35.1s	remaining: 862ms
1952:	learn: 0.6937605	total: 35.1s	remaining: 844ms
1953:	learn: 0.6937052	total: 35.1s	remaining: 826ms
1954:	learn: 0.6935857	total: 35.1s	remaining: 809ms
1955:	learn: 0.6934570	total: 35.1s	remaining: 791ms
1956:	learn: 0.6933411	total: 35.2s	remaining: 773ms
1957:	learn: 0.6931843	total: 35.2s	remaining: 755ms
1958:

## Vote, stacked, bagged regressors

In [40]:
vote_regressor = VotingRegressor(estimators=estimators,
                                n_jobs=-1)
vote_regressor = quick_eval(reduced_train, vote_regressor, pc=True)

The accuracy of VotingRegressor is 0.5712266817410967
The QWK of VotingRegressor is 0.5915220967303223


In [53]:
stacked_regressor = StackingRegressor(estimators=estimators,
                                n_jobs=-1)
stacked_regressor = quick_eval(reduced_train, stacked_regressor, pc=True)

The accuracy of StackingRegressor is 0.5672696438665913
The QWK of StackingRegressor is 0.5979707068817519


In [55]:
bagged_regressor = BaggingRegressor(base_estimator=lgbm,
                               n_jobs=-1,
                                   warm_start=True)
quick_eval(reduced_train, bagged_regressor, pc=True)

The accuracy of BaggingRegressor is 0.5746184284906727
The QWK of BaggingRegressor is 0.5958131257022264


BaggingRegressor(base_estimator=LGBMRegressor(boosting_type='gbdt',
                                              class_weight=None,
                                              colsample_bytree=1.0,
                                              importance_type='split',
                                              learning_rate=0.1, max_depth=-1,
                                              min_child_samples=20,
                                              min_child_weight=0.001,
                                              min_split_gain=0.0,
                                              n_estimators=100, n_jobs=-1,
                                              num_leaves=31, objective=None,
                                              random_state=None, reg_alpha=0.0,
                                              reg_lambda=0.0, silent=True,
                                              subsample=1.0,
                                              subsample_for_bin=200000,
     

## XGBoost

In [63]:
mlflow.start_run(run_name='xgb-tuned-params', nested=True)


import xgboost as xgb
from sklearn import datasets
from sklearn.model_selection import train_test_split


X = reduced_train.drop(cols_to_drop, axis=1)[features]
y = reduced_train.accuracy_group

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

dtrain = xgb.DMatrix(X_train, label=y_train)
params = {
            'colsample_bytree': 0.2,                 
            'learning_rate': 0.01,
            'objective':'reg:squarederror',
            'max_depth': 6,
            'subsample': 1,
            'min_child_weight': 3,
            'gamma': 0.25,
            'n_estimators': 1400
         }
xgb_model = xgb.train(params=params, dtrain=dtrain, num_boost_round=10)

xgb_model_path = "xgb_model.pth"
xgb_model.save_model(xgb_model_path)

dtest = xgb.DMatrix(X_test, label=y_test)

print('through the test')
y_pred = xgb_model.predict(dtest)

y_pred = get_class_pred(y_pred,reduced_train)

print('through the test')

accuracy = accuracy_score(y_test, y_pred)
qwk = cohen_kappa_score(y_test, y_pred, weights="quadratic")
mlflow.log_param("features_shape", X.shape)
mlflow.log_param("estimator", 'xgb')
mlflow.log_metric("Accuracy", accuracy)
mlflow.log_metric("QWK", qwk)
mlflow.sklearn.log_model(xgb_model, "model")
mlflow.end_run()
print('complete')

through the test
through the test
complete
