## Continue ensembles from v1 on full dataset format commonly used on Kaggle

### Integrated mlflow lets this notebook rinse-repeat

In [1]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from collections import Counter
import mlflow

# local imports
from prepare import *
from evaluate import *

### Set new experiment to keep track in mlflow

In [2]:
mlflow.set_experiment('high_counts_dropped')

INFO: 'high_counts_dropped' does not exist. Creating a new experiment


### Read in initial datasets if needed

In [3]:
#raw_train, raw_train_labels, raw_test, specs, sample = read_raw_csvs()
#raw_train_labels = pd.read_csv('data/train_labels.csv')

### Load large train/test features from Josh's work

In [4]:
reduced_train = pd.read_csv('reduce_train_cc.csv')
reduced_test = pd.read_csv('reduce_test_cc.csv')
reduced_train.shape, reduced_test.shape

((10491, 881), (957, 881))

### Start throwing model mud at the wall

In [5]:
from catboost import CatBoostClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    AdaBoostClassifier,
    ExtraTreesClassifier,
    BaggingClassifier,
    GradientBoostingClassifier,
    VotingClassifier,
    StackingClassifier,
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, SGDClassifier, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
import warnings

warnings.filterwarnings(
    "ignore"
)  # Ridge classifier throws some warnings about ill-conditioned matrix

  return _load(spec)


### Baseline accuracy of 50% (or 25% if balanced)

In [6]:
reduced_train.accuracy_group.value_counts(normalize=True)

3    0.527404
0    0.217520
1    0.128110
2    0.126966
Name: accuracy_group, dtype: float64

# Initialize Models and start testing accuracy

In [7]:
rf = RandomForestClassifier()
rc = RidgeClassifier()
ac = AdaBoostClassifier()
et = ExtraTreesClassifier()
bc = BaggingClassifier()
gbc = GradientBoostingClassifier()

clf = CatBoostClassifier(
    loss_function='MultiClass',
    task_type="CPU",
    learning_rate=0.01,
    iterations=2000,
    od_type="Iter",
    early_stopping_rounds=500,
    random_seed=42
    )

# tried with minimal results
#nb = GaussianNB()
#lr = LogisticRegression()               ## if they are commented out, they weren't performing well (or operator error...)
#sgd = SGDClassifier()

In [8]:
knn = KNeighborsClassifier()
svc = SVC(probability=True, verbose=1)

## Evaluate model performance

In [9]:
# Catboost - accuracy 56%a
clf = quick_eval(reduced_train, clf)#, cv=True)

0:	learn: 1.3819055	total: 127ms	remaining: 4m 13s
1:	learn: 1.3773734	total: 186ms	remaining: 3m 6s
2:	learn: 1.3725318	total: 243ms	remaining: 2m 41s
3:	learn: 1.3683463	total: 297ms	remaining: 2m 28s
4:	learn: 1.3642037	total: 352ms	remaining: 2m 20s
5:	learn: 1.3602467	total: 405ms	remaining: 2m 14s
6:	learn: 1.3562560	total: 461ms	remaining: 2m 11s
7:	learn: 1.3516902	total: 520ms	remaining: 2m 9s
8:	learn: 1.3473216	total: 584ms	remaining: 2m 9s
9:	learn: 1.3435753	total: 646ms	remaining: 2m 8s
10:	learn: 1.3401197	total: 705ms	remaining: 2m 7s
11:	learn: 1.3364536	total: 766ms	remaining: 2m 6s
12:	learn: 1.3324811	total: 832ms	remaining: 2m 7s
13:	learn: 1.3288252	total: 893ms	remaining: 2m 6s
14:	learn: 1.3255355	total: 948ms	remaining: 2m 5s
15:	learn: 1.3219867	total: 1.02s	remaining: 2m 6s
16:	learn: 1.3187081	total: 1.09s	remaining: 2m 7s
17:	learn: 1.3154213	total: 1.17s	remaining: 2m 8s
18:	learn: 1.3107393	total: 1.23s	remaining: 2m 8s
19:	learn: 1.3077396	total: 1.29s	r

159:	learn: 1.0918542	total: 9.57s	remaining: 1m 50s
160:	learn: 1.0913104	total: 9.64s	remaining: 1m 50s
161:	learn: 1.0903757	total: 9.71s	remaining: 1m 50s
162:	learn: 1.0900222	total: 9.76s	remaining: 1m 50s
163:	learn: 1.0895727	total: 9.82s	remaining: 1m 49s
164:	learn: 1.0891647	total: 9.88s	remaining: 1m 49s
165:	learn: 1.0887391	total: 9.94s	remaining: 1m 49s
166:	learn: 1.0883533	total: 10s	remaining: 1m 49s
167:	learn: 1.0879264	total: 10.1s	remaining: 1m 49s
168:	learn: 1.0875390	total: 10.1s	remaining: 1m 49s
169:	learn: 1.0868865	total: 10.2s	remaining: 1m 49s
170:	learn: 1.0860569	total: 10.2s	remaining: 1m 49s
171:	learn: 1.0856369	total: 10.3s	remaining: 1m 49s
172:	learn: 1.0851273	total: 10.3s	remaining: 1m 49s
173:	learn: 1.0848679	total: 10.4s	remaining: 1m 48s
174:	learn: 1.0840175	total: 10.4s	remaining: 1m 48s
175:	learn: 1.0836034	total: 10.5s	remaining: 1m 48s
176:	learn: 1.0827414	total: 10.6s	remaining: 1m 48s
177:	learn: 1.0823718	total: 10.6s	remaining: 1m

315:	learn: 1.0324473	total: 18.7s	remaining: 1m 39s
316:	learn: 1.0322552	total: 18.7s	remaining: 1m 39s
317:	learn: 1.0320580	total: 18.8s	remaining: 1m 39s
318:	learn: 1.0316949	total: 18.8s	remaining: 1m 39s
319:	learn: 1.0313288	total: 18.9s	remaining: 1m 39s
320:	learn: 1.0312023	total: 18.9s	remaining: 1m 39s
321:	learn: 1.0308567	total: 19s	remaining: 1m 39s
322:	learn: 1.0306390	total: 19.1s	remaining: 1m 38s
323:	learn: 1.0305052	total: 19.1s	remaining: 1m 38s
324:	learn: 1.0300721	total: 19.2s	remaining: 1m 38s
325:	learn: 1.0298718	total: 19.2s	remaining: 1m 38s
326:	learn: 1.0294390	total: 19.3s	remaining: 1m 38s
327:	learn: 1.0292580	total: 19.4s	remaining: 1m 38s
328:	learn: 1.0291467	total: 19.4s	remaining: 1m 38s
329:	learn: 1.0289490	total: 19.5s	remaining: 1m 38s
330:	learn: 1.0285167	total: 19.5s	remaining: 1m 38s
331:	learn: 1.0284619	total: 19.6s	remaining: 1m 38s
332:	learn: 1.0282724	total: 19.7s	remaining: 1m 38s
333:	learn: 1.0280456	total: 19.7s	remaining: 1m

474:	learn: 1.0041369	total: 27.5s	remaining: 1m 28s
475:	learn: 1.0040864	total: 27.5s	remaining: 1m 28s
476:	learn: 1.0040003	total: 27.6s	remaining: 1m 27s
477:	learn: 1.0038626	total: 27.6s	remaining: 1m 27s
478:	learn: 1.0037709	total: 27.7s	remaining: 1m 27s
479:	learn: 1.0034791	total: 27.7s	remaining: 1m 27s
480:	learn: 1.0033313	total: 27.8s	remaining: 1m 27s
481:	learn: 1.0032299	total: 27.8s	remaining: 1m 27s
482:	learn: 1.0030648	total: 27.9s	remaining: 1m 27s
483:	learn: 1.0029477	total: 27.9s	remaining: 1m 27s
484:	learn: 1.0028050	total: 28s	remaining: 1m 27s
485:	learn: 1.0026378	total: 28s	remaining: 1m 27s
486:	learn: 1.0024516	total: 28.1s	remaining: 1m 27s
487:	learn: 1.0022713	total: 28.1s	remaining: 1m 27s
488:	learn: 1.0019824	total: 28.2s	remaining: 1m 27s
489:	learn: 1.0018595	total: 28.3s	remaining: 1m 27s
490:	learn: 1.0016881	total: 28.3s	remaining: 1m 27s
491:	learn: 1.0016074	total: 28.4s	remaining: 1m 26s
492:	learn: 1.0015568	total: 28.4s	remaining: 1m 2

631:	learn: 0.9845726	total: 35.9s	remaining: 1m 17s
632:	learn: 0.9845365	total: 36s	remaining: 1m 17s
633:	learn: 0.9844452	total: 36s	remaining: 1m 17s
634:	learn: 0.9843590	total: 36.1s	remaining: 1m 17s
635:	learn: 0.9842883	total: 36.2s	remaining: 1m 17s
636:	learn: 0.9842234	total: 36.2s	remaining: 1m 17s
637:	learn: 0.9841037	total: 36.3s	remaining: 1m 17s
638:	learn: 0.9839664	total: 36.3s	remaining: 1m 17s
639:	learn: 0.9838916	total: 36.4s	remaining: 1m 17s
640:	learn: 0.9837765	total: 36.5s	remaining: 1m 17s
641:	learn: 0.9836882	total: 36.5s	remaining: 1m 17s
642:	learn: 0.9836246	total: 36.6s	remaining: 1m 17s
643:	learn: 0.9835109	total: 36.6s	remaining: 1m 17s
644:	learn: 0.9834129	total: 36.7s	remaining: 1m 17s
645:	learn: 0.9833383	total: 36.7s	remaining: 1m 17s
646:	learn: 0.9832710	total: 36.8s	remaining: 1m 16s
647:	learn: 0.9831948	total: 36.8s	remaining: 1m 16s
648:	learn: 0.9829536	total: 36.9s	remaining: 1m 16s
649:	learn: 0.9828810	total: 37s	remaining: 1m 16s

789:	learn: 0.9670862	total: 45s	remaining: 1m 8s
790:	learn: 0.9670457	total: 45.1s	remaining: 1m 8s
791:	learn: 0.9669346	total: 45.2s	remaining: 1m 8s
792:	learn: 0.9668486	total: 45.3s	remaining: 1m 8s
793:	learn: 0.9667731	total: 45.4s	remaining: 1m 8s
794:	learn: 0.9665926	total: 45.5s	remaining: 1m 8s
795:	learn: 0.9665479	total: 45.6s	remaining: 1m 8s
796:	learn: 0.9664726	total: 45.7s	remaining: 1m 8s
797:	learn: 0.9664030	total: 45.8s	remaining: 1m 9s
798:	learn: 0.9663116	total: 45.9s	remaining: 1m 9s
799:	learn: 0.9661805	total: 46.1s	remaining: 1m 9s
800:	learn: 0.9661112	total: 46.2s	remaining: 1m 9s
801:	learn: 0.9659979	total: 46.3s	remaining: 1m 9s
802:	learn: 0.9657621	total: 46.5s	remaining: 1m 9s
803:	learn: 0.9655547	total: 46.6s	remaining: 1m 9s
804:	learn: 0.9654358	total: 46.7s	remaining: 1m 9s
805:	learn: 0.9652445	total: 46.8s	remaining: 1m 9s
806:	learn: 0.9652017	total: 46.9s	remaining: 1m 9s
807:	learn: 0.9651160	total: 47s	remaining: 1m 9s
808:	learn: 0.96

948:	learn: 0.9488248	total: 56s	remaining: 1m 2s
949:	learn: 0.9487019	total: 56.1s	remaining: 1m 1s
950:	learn: 0.9486456	total: 56.1s	remaining: 1m 1s
951:	learn: 0.9485155	total: 56.2s	remaining: 1m 1s
952:	learn: 0.9484861	total: 56.3s	remaining: 1m 1s
953:	learn: 0.9483342	total: 56.3s	remaining: 1m 1s
954:	learn: 0.9482936	total: 56.4s	remaining: 1m 1s
955:	learn: 0.9481684	total: 56.4s	remaining: 1m 1s
956:	learn: 0.9479934	total: 56.5s	remaining: 1m 1s
957:	learn: 0.9478391	total: 56.6s	remaining: 1m 1s
958:	learn: 0.9477752	total: 56.6s	remaining: 1m 1s
959:	learn: 0.9477213	total: 56.7s	remaining: 1m 1s
960:	learn: 0.9475305	total: 56.7s	remaining: 1m 1s
961:	learn: 0.9473741	total: 56.8s	remaining: 1m 1s
962:	learn: 0.9472906	total: 56.8s	remaining: 1m 1s
963:	learn: 0.9471469	total: 56.9s	remaining: 1m 1s
964:	learn: 0.9469855	total: 57s	remaining: 1m 1s
965:	learn: 0.9469289	total: 57s	remaining: 1m 1s
966:	learn: 0.9467766	total: 57.1s	remaining: 1m
967:	learn: 0.9467043

1109:	learn: 0.9292472	total: 1m 5s	remaining: 52.8s
1110:	learn: 0.9289878	total: 1m 5s	remaining: 52.8s
1111:	learn: 0.9288483	total: 1m 6s	remaining: 52.7s
1112:	learn: 0.9287637	total: 1m 6s	remaining: 52.7s
1113:	learn: 0.9287007	total: 1m 6s	remaining: 52.6s
1114:	learn: 0.9286485	total: 1m 6s	remaining: 52.6s
1115:	learn: 0.9285891	total: 1m 6s	remaining: 52.5s
1116:	learn: 0.9284630	total: 1m 6s	remaining: 52.4s
1117:	learn: 0.9283132	total: 1m 6s	remaining: 52.4s
1118:	learn: 0.9282395	total: 1m 6s	remaining: 52.3s
1119:	learn: 0.9281634	total: 1m 6s	remaining: 52.3s
1120:	learn: 0.9280688	total: 1m 6s	remaining: 52.2s
1121:	learn: 0.9280206	total: 1m 6s	remaining: 52.2s
1122:	learn: 0.9278876	total: 1m 6s	remaining: 52.1s
1123:	learn: 0.9277376	total: 1m 6s	remaining: 52s
1124:	learn: 0.9276338	total: 1m 6s	remaining: 52s
1125:	learn: 0.9275535	total: 1m 6s	remaining: 51.9s
1126:	learn: 0.9274886	total: 1m 6s	remaining: 51.9s
1127:	learn: 0.9273664	total: 1m 7s	remaining: 51.

1264:	learn: 0.9131293	total: 1m 14s	remaining: 43.4s
1265:	learn: 0.9130359	total: 1m 14s	remaining: 43.4s
1266:	learn: 0.9129372	total: 1m 14s	remaining: 43.3s
1267:	learn: 0.9128287	total: 1m 14s	remaining: 43.2s
1268:	learn: 0.9127694	total: 1m 14s	remaining: 43.2s
1269:	learn: 0.9126319	total: 1m 15s	remaining: 43.1s
1270:	learn: 0.9125351	total: 1m 15s	remaining: 43.1s
1271:	learn: 0.9125119	total: 1m 15s	remaining: 43s
1272:	learn: 0.9123799	total: 1m 15s	remaining: 43s
1273:	learn: 0.9122697	total: 1m 15s	remaining: 42.9s
1274:	learn: 0.9121235	total: 1m 15s	remaining: 42.8s
1275:	learn: 0.9120311	total: 1m 15s	remaining: 42.8s
1276:	learn: 0.9119572	total: 1m 15s	remaining: 42.7s
1277:	learn: 0.9118572	total: 1m 15s	remaining: 42.7s
1278:	learn: 0.9117154	total: 1m 15s	remaining: 42.6s
1279:	learn: 0.9116529	total: 1m 15s	remaining: 42.5s
1280:	learn: 0.9116127	total: 1m 15s	remaining: 42.5s
1281:	learn: 0.9115122	total: 1m 15s	remaining: 42.4s
1282:	learn: 0.9114021	total: 1m

1419:	learn: 0.8994490	total: 1m 24s	remaining: 34.7s
1420:	learn: 0.8993405	total: 1m 24s	remaining: 34.6s
1421:	learn: 0.8992638	total: 1m 25s	remaining: 34.6s
1422:	learn: 0.8991611	total: 1m 25s	remaining: 34.5s
1423:	learn: 0.8990801	total: 1m 25s	remaining: 34.4s
1424:	learn: 0.8989801	total: 1m 25s	remaining: 34.4s
1425:	learn: 0.8989597	total: 1m 25s	remaining: 34.3s
1426:	learn: 0.8988992	total: 1m 25s	remaining: 34.3s
1427:	learn: 0.8987748	total: 1m 25s	remaining: 34.2s
1428:	learn: 0.8986444	total: 1m 25s	remaining: 34.1s
1429:	learn: 0.8985831	total: 1m 25s	remaining: 34.1s
1430:	learn: 0.8984408	total: 1m 25s	remaining: 34s
1431:	learn: 0.8983883	total: 1m 25s	remaining: 34s
1432:	learn: 0.8983274	total: 1m 25s	remaining: 33.9s
1433:	learn: 0.8982618	total: 1m 25s	remaining: 33.8s
1434:	learn: 0.8982100	total: 1m 25s	remaining: 33.8s
1435:	learn: 0.8981100	total: 1m 25s	remaining: 33.7s
1436:	learn: 0.8979862	total: 1m 25s	remaining: 33.6s
1437:	learn: 0.8978955	total: 1m

1573:	learn: 0.8875204	total: 1m 33s	remaining: 25.3s
1574:	learn: 0.8874103	total: 1m 33s	remaining: 25.3s
1575:	learn: 0.8872909	total: 1m 33s	remaining: 25.2s
1576:	learn: 0.8872222	total: 1m 33s	remaining: 25.2s
1577:	learn: 0.8871652	total: 1m 33s	remaining: 25.1s
1578:	learn: 0.8871053	total: 1m 33s	remaining: 25s
1579:	learn: 0.8869954	total: 1m 33s	remaining: 25s
1580:	learn: 0.8869746	total: 1m 34s	remaining: 24.9s
1581:	learn: 0.8868373	total: 1m 34s	remaining: 24.9s
1582:	learn: 0.8867325	total: 1m 34s	remaining: 24.8s
1583:	learn: 0.8866241	total: 1m 34s	remaining: 24.7s
1584:	learn: 0.8865468	total: 1m 34s	remaining: 24.7s
1585:	learn: 0.8864706	total: 1m 34s	remaining: 24.6s
1586:	learn: 0.8863313	total: 1m 34s	remaining: 24.6s
1587:	learn: 0.8862583	total: 1m 34s	remaining: 24.5s
1588:	learn: 0.8861693	total: 1m 34s	remaining: 24.4s
1589:	learn: 0.8860849	total: 1m 34s	remaining: 24.4s
1590:	learn: 0.8860331	total: 1m 34s	remaining: 24.3s
1591:	learn: 0.8860083	total: 1m

1726:	learn: 0.8766528	total: 1m 41s	remaining: 16.1s
1727:	learn: 0.8765959	total: 1m 41s	remaining: 16.1s
1728:	learn: 0.8764931	total: 1m 42s	remaining: 16s
1729:	learn: 0.8764284	total: 1m 42s	remaining: 15.9s
1730:	learn: 0.8763875	total: 1m 42s	remaining: 15.9s
1731:	learn: 0.8763473	total: 1m 42s	remaining: 15.8s
1732:	learn: 0.8762900	total: 1m 42s	remaining: 15.8s
1733:	learn: 0.8762380	total: 1m 42s	remaining: 15.7s
1734:	learn: 0.8761639	total: 1m 42s	remaining: 15.6s
1735:	learn: 0.8761458	total: 1m 42s	remaining: 15.6s
1736:	learn: 0.8760682	total: 1m 42s	remaining: 15.5s
1737:	learn: 0.8760243	total: 1m 42s	remaining: 15.5s
1738:	learn: 0.8760052	total: 1m 42s	remaining: 15.4s
1739:	learn: 0.8758790	total: 1m 42s	remaining: 15.3s
1740:	learn: 0.8757954	total: 1m 42s	remaining: 15.3s
1741:	learn: 0.8757288	total: 1m 42s	remaining: 15.2s
1742:	learn: 0.8756550	total: 1m 42s	remaining: 15.2s
1743:	learn: 0.8755693	total: 1m 42s	remaining: 15.1s
1744:	learn: 0.8755288	total: 

1881:	learn: 0.8665273	total: 1m 50s	remaining: 6.92s
1882:	learn: 0.8664725	total: 1m 50s	remaining: 6.86s
1883:	learn: 0.8664131	total: 1m 50s	remaining: 6.8s
1884:	learn: 0.8663805	total: 1m 50s	remaining: 6.75s
1885:	learn: 0.8663340	total: 1m 50s	remaining: 6.69s
1886:	learn: 0.8662907	total: 1m 50s	remaining: 6.63s
1887:	learn: 0.8662291	total: 1m 50s	remaining: 6.57s
1888:	learn: 0.8661897	total: 1m 50s	remaining: 6.51s
1889:	learn: 0.8661542	total: 1m 50s	remaining: 6.45s
1890:	learn: 0.8661189	total: 1m 50s	remaining: 6.39s
1891:	learn: 0.8660053	total: 1m 50s	remaining: 6.33s
1892:	learn: 0.8659244	total: 1m 51s	remaining: 6.28s
1893:	learn: 0.8658404	total: 1m 51s	remaining: 6.22s
1894:	learn: 0.8657576	total: 1m 51s	remaining: 6.16s
1895:	learn: 0.8657272	total: 1m 51s	remaining: 6.1s
1896:	learn: 0.8655996	total: 1m 51s	remaining: 6.04s
1897:	learn: 0.8655223	total: 1m 51s	remaining: 5.98s
1898:	learn: 0.8654279	total: 1m 51s	remaining: 5.92s
1899:	learn: 0.8653250	total: 

In [10]:
rf = quick_eval(reduced_train, rf, cv=True)  #.563cv acc
rc = quick_eval(reduced_train, rc, cv=True)  #.528cv acc
ac = quick_eval(reduced_train, ac, cv=True)  #.529cv acc
et = quick_eval(reduced_train, et, cv=True)  #.5399cv acc
bc = quick_eval(reduced_train, bc, cv=True)  #.511cv acc
gbc = quick_eval(reduced_train, gbc, cv=True)  #.435 cv acc


#clf = quick_eval(reduced_train, clf, cv=True) #

# These were not impressive, dropping from future
#quick_eval(reduced_train, lr, cv=True)
#quick_eval(reduced_train, sgd, cv=True)
#quick_eval(reduced_train, nb, cv=True)
#quick_eval(reduced_train, knn, scale=True, cv=True)
#quick_eval(reduced_train, svc, scale=True, cv=True)

The CV qwk score of RandomForestClassifier is 0.31559657617418424
The CV qwk score of RidgeClassifier is 0.245585757125027
The CV qwk score of AdaBoostClassifier is 0.40474254791500375
The CV qwk score of ExtraTreesClassifier is 0.2894314096385525
The CV qwk score of BaggingClassifier is 0.44350649215216204
The CV qwk score of GradientBoostingClassifier is 0.3544783875481562


In [11]:
#quick_eval(reduced_train, mlp, cv=True)   # 0.295
#quick_eval(reduced_train, gpc) #   .498
#quick_eval(reduced_train, rbf, cv=True)   poor
#quick_eval(reduced_train, dt, cv=True)     #.411

In [12]:
# bc_rf_10 = BaggingClassifier(
#     base_estimator=RandomForestClassifier(max_depth=10),
#     n_estimators=20)#The accuracy of BaggingClassifier is 0.5387224420576597
#                     #The QWK of BaggingClassifier is 0.18304098870845487

# bc_rf_50 = BaggingClassifier(
#     base_estimator=RandomForestClassifier(max_depth=50),
#     n_estimators=20)#The accuracy of BaggingClassifier is 0.5729225551158846
#                     #The QWK of BaggingClassifier is 0.3474125031192433

bc_rf_5 = BaggingClassifier(
    base_estimator=RandomForestClassifier(max_depth=5),
    n_estimators=20)#The accuracy of BaggingClassifier is 0.5740531373657434
                    #The QWK of BaggingClassifier is 0.40494994168142406

bc_gbc = BaggingClassifier(
    base_estimator=GradientBoostingClassifier(),
    n_estimators=20)
# The accuracy of BaggingClassifier is 0.6237987563595252
# The QWK of BaggingClassifier is 0.5252575839341429

bc_abc = BaggingClassifier(
    base_estimator=AdaBoostClassifier(),
    n_estimators=20)
# The accuracy of BaggingClassifier is 0.610231769361221
# The QWK of BaggingClassifier is 0.4936903908658856

bc_rf_5 = quick_eval(reduced_train, bc_rf_5)
# quick_eval(reduced_train, bc_rf_10)    # 0.549 with 20 estimators max depth of 10  (.377 with balanced)    0.551 with 200 
# quick_eval(reduced_train, bc_rf_50) 
bc_gbc = quick_eval(reduced_train, bc_gbc)   # 0.576 with 20 estimators (.389 with balanced)                     0.578 with 200
bc_abc = quick_eval(reduced_train, bc_abc)   # 0.563 with 20 estimators  (.374 with balanced)                    0.561 with 200

The accuracy of BaggingClassifier is 0.5483563601715102
The QWK of BaggingClassifier is 0.10505023150576664
The accuracy of BaggingClassifier is 0.6322058122915674
The QWK of BaggingClassifier is 0.5057386002016913
The accuracy of BaggingClassifier is 0.6174368747022392
The QWK of BaggingClassifier is 0.4492911425958489


## Model Ensembling

In [13]:
estimators = [
    ('Adaboost',ac),
    ('rf', rf), 
    ('gbc', gbc),
    ('et', et),

    ('rc', rc),

    ('bc_rf_5', bc_rf_5),

    ('bc_gbc', bc_gbc),
    ('bc_abc', bc_abc),
]

stacking_clf = StackingClassifier(estimators=estimators, 
                final_estimator=RandomForestClassifier(), n_jobs=-1)        # created, load from pickle
quick_eval(reduced_train, stacking_clf)

The accuracy of StackingClassifier is 0.631252977608385
The QWK of StackingClassifier is 0.5359839503974366


StackingClassifier(cv=None,
                   estimators=[('Adaboost',
                                AdaBoostClassifier(algorithm='SAMME.R',
                                                   base_estimator=None,
                                                   learning_rate=1.0,
                                                   n_estimators=50,
                                                   random_state=None)),
                               ('rf',
                                RandomForestClassifier(bootstrap=True,
                                                       ccp_alpha=0.0,
                                                       class_weight=None,
                                                       criterion='gini',
                                                       max_depth=None,
                                                       max_features='auto',
                                                       max_leaf_nodes=None,
                         

In [14]:
vc = VotingClassifier(estimators=estimators,
     n_jobs=-1,
     voting='hard')

quick_eval(reduced_train, vc)#, cv=True)     #initial w/o catboost .564

The accuracy of VotingClassifier is 0.6226774654597428
The QWK of VotingClassifier is 0.4641093973000979


VotingClassifier(estimators=[('Adaboost',
                              AdaBoostClassifier(algorithm='SAMME.R',
                                                 base_estimator=None,
                                                 learning_rate=1.0,
                                                 n_estimators=50,
                                                 random_state=None)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
          