In [1]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from collections import Counter

# local imports
from prepare import *
from evaluate import *

### Read in initial datasets if needed

In [2]:
#raw_train, raw_train_labels, raw_test, specs, sample = read_raw_csvs()
raw_train_labels = pd.read_csv('data/train_labels.csv')

### Load compiled train/test datasets

In [22]:
train, test = load_and_prep(raw_train_labels)

### Start throwing model mud at the wall

In [4]:
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, \
                                ExtraTreesClassifier, BaggingClassifier, \
                                GradientBoostingClassifier, VotingClassifier, StackingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, SGDClassifier, RidgeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings('ignore')  #Ridge classifier throws some warnings about ill-conditioned matrix

### Baseline accuracy of 50%

In [5]:
train.accuracy_group.value_counts(normalize=True)

3    0.500000
0    0.239062
1    0.136292
2    0.124647
Name: accuracy_group, dtype: float64

# Initialize Models and start testing accuracy

#### KNN and SVC will require scaling, others shouldn't

In [6]:
rf = RandomForestClassifier(verbose=1)
#lr = LogisticRegression()               ## if they are commented out, they weren't performing well (or operator error...)
#sgd = SGDClassifier()
rc = RidgeClassifier()
#nb = GaussianNB()
ac = AdaBoostClassifier()
et = ExtraTreesClassifier()
bc = BaggingClassifier()
gbc = GradientBoostingClassifier(verbose=1)
clf = CatBoostClassifier(
    loss_function='MultiClass',
    task_type="CPU",
    learning_rate=0.01,
    iterations=2000,
    od_type="Iter",
    early_stopping_rounds=500,
    random_seed=42
    )

In [7]:
knn = KNeighborsClassifier()
svc = SVC(probability=True, verbose=1)

## Evaluate model performance

In [8]:
# Catboost - accuracy 56%a
quick_eval(train, clf, cv=True)

0:	learn: 1.3823172	total: 72ms	remaining: 2m 23s
1:	learn: 1.3783212	total: 91.2ms	remaining: 1m 31s
2:	learn: 1.3745091	total: 105ms	remaining: 1m 10s
3:	learn: 1.3708089	total: 120ms	remaining: 59.7s
4:	learn: 1.3667784	total: 136ms	remaining: 54.2s
5:	learn: 1.3631899	total: 151ms	remaining: 50.3s
6:	learn: 1.3598716	total: 166ms	remaining: 47.1s
7:	learn: 1.3553795	total: 181ms	remaining: 45.1s
8:	learn: 1.3520789	total: 197ms	remaining: 43.6s
9:	learn: 1.3487954	total: 212ms	remaining: 42.3s
10:	learn: 1.3456351	total: 230ms	remaining: 41.6s
11:	learn: 1.3426928	total: 245ms	remaining: 40.7s
12:	learn: 1.3385257	total: 262ms	remaining: 40s
13:	learn: 1.3345459	total: 279ms	remaining: 39.6s
14:	learn: 1.3316149	total: 296ms	remaining: 39.1s
15:	learn: 1.3288025	total: 313ms	remaining: 38.9s
16:	learn: 1.3261157	total: 332ms	remaining: 38.7s
17:	learn: 1.3224595	total: 348ms	remaining: 38.3s
18:	learn: 1.3197844	total: 367ms	remaining: 38.3s
19:	learn: 1.3172817	total: 383ms	remain

170:	learn: 1.1506898	total: 3.12s	remaining: 33.4s
171:	learn: 1.1501672	total: 3.14s	remaining: 33.4s
172:	learn: 1.1496049	total: 3.16s	remaining: 33.4s
173:	learn: 1.1490507	total: 3.17s	remaining: 33.3s
174:	learn: 1.1482934	total: 3.19s	remaining: 33.3s
175:	learn: 1.1476593	total: 3.21s	remaining: 33.3s
176:	learn: 1.1472006	total: 3.23s	remaining: 33.2s
177:	learn: 1.1466613	total: 3.24s	remaining: 33.2s
178:	learn: 1.1462334	total: 3.27s	remaining: 33.2s
179:	learn: 1.1456792	total: 3.28s	remaining: 33.2s
180:	learn: 1.1451866	total: 3.3s	remaining: 33.2s
181:	learn: 1.1449950	total: 3.32s	remaining: 33.2s
182:	learn: 1.1447269	total: 3.34s	remaining: 33.2s
183:	learn: 1.1444470	total: 3.36s	remaining: 33.1s
184:	learn: 1.1439624	total: 3.38s	remaining: 33.1s
185:	learn: 1.1435474	total: 3.39s	remaining: 33.1s
186:	learn: 1.1431318	total: 3.42s	remaining: 33.1s
187:	learn: 1.1427546	total: 3.43s	remaining: 33.1s
188:	learn: 1.1425388	total: 3.45s	remaining: 33.1s
189:	learn: 1

330:	learn: 1.1085578	total: 5.91s	remaining: 29.8s
331:	learn: 1.1083047	total: 5.92s	remaining: 29.8s
332:	learn: 1.1080155	total: 5.94s	remaining: 29.7s
333:	learn: 1.1077228	total: 5.96s	remaining: 29.7s
334:	learn: 1.1075248	total: 5.98s	remaining: 29.7s
335:	learn: 1.1073940	total: 6s	remaining: 29.7s
336:	learn: 1.1072711	total: 6.01s	remaining: 29.7s
337:	learn: 1.1069595	total: 6.03s	remaining: 29.6s
338:	learn: 1.1066931	total: 6.04s	remaining: 29.6s
339:	learn: 1.1066014	total: 6.06s	remaining: 29.6s
340:	learn: 1.1064315	total: 6.08s	remaining: 29.6s
341:	learn: 1.1061634	total: 6.09s	remaining: 29.5s
342:	learn: 1.1060678	total: 6.11s	remaining: 29.5s
343:	learn: 1.1058736	total: 6.13s	remaining: 29.5s
344:	learn: 1.1057481	total: 6.14s	remaining: 29.5s
345:	learn: 1.1055788	total: 6.16s	remaining: 29.5s
346:	learn: 1.1053572	total: 6.18s	remaining: 29.5s
347:	learn: 1.1051812	total: 6.2s	remaining: 29.4s
348:	learn: 1.1050134	total: 6.22s	remaining: 29.4s
349:	learn: 1.10

494:	learn: 1.0851713	total: 8.68s	remaining: 26.4s
495:	learn: 1.0850618	total: 8.7s	remaining: 26.4s
496:	learn: 1.0848257	total: 8.72s	remaining: 26.4s
497:	learn: 1.0846863	total: 8.73s	remaining: 26.3s
498:	learn: 1.0845523	total: 8.75s	remaining: 26.3s
499:	learn: 1.0844488	total: 8.77s	remaining: 26.3s
500:	learn: 1.0843041	total: 8.79s	remaining: 26.3s
501:	learn: 1.0842458	total: 8.8s	remaining: 26.3s
502:	learn: 1.0841855	total: 8.82s	remaining: 26.2s
503:	learn: 1.0841072	total: 8.83s	remaining: 26.2s
504:	learn: 1.0840143	total: 8.85s	remaining: 26.2s
505:	learn: 1.0838784	total: 8.87s	remaining: 26.2s
506:	learn: 1.0836983	total: 8.89s	remaining: 26.2s
507:	learn: 1.0835831	total: 8.91s	remaining: 26.2s
508:	learn: 1.0835016	total: 8.93s	remaining: 26.2s
509:	learn: 1.0834440	total: 8.95s	remaining: 26.1s
510:	learn: 1.0832825	total: 8.96s	remaining: 26.1s
511:	learn: 1.0831426	total: 8.98s	remaining: 26.1s
512:	learn: 1.0829617	total: 9s	remaining: 26.1s
513:	learn: 1.082

656:	learn: 1.0684300	total: 11.4s	remaining: 23.4s
657:	learn: 1.0683744	total: 11.5s	remaining: 23.4s
658:	learn: 1.0682957	total: 11.5s	remaining: 23.4s
659:	learn: 1.0681732	total: 11.5s	remaining: 23.3s
660:	learn: 1.0681032	total: 11.5s	remaining: 23.3s
661:	learn: 1.0680108	total: 11.5s	remaining: 23.3s
662:	learn: 1.0678614	total: 11.5s	remaining: 23.3s
663:	learn: 1.0676974	total: 11.6s	remaining: 23.3s
664:	learn: 1.0675772	total: 11.6s	remaining: 23.3s
665:	learn: 1.0675058	total: 11.6s	remaining: 23.2s
666:	learn: 1.0673913	total: 11.6s	remaining: 23.2s
667:	learn: 1.0673366	total: 11.6s	remaining: 23.2s
668:	learn: 1.0672859	total: 11.6s	remaining: 23.2s
669:	learn: 1.0671869	total: 11.7s	remaining: 23.2s
670:	learn: 1.0671438	total: 11.7s	remaining: 23.1s
671:	learn: 1.0669916	total: 11.7s	remaining: 23.1s
672:	learn: 1.0669378	total: 11.7s	remaining: 23.1s
673:	learn: 1.0668546	total: 11.7s	remaining: 23.1s
674:	learn: 1.0667927	total: 11.8s	remaining: 23.1s
675:	learn: 

822:	learn: 1.0534221	total: 14.2s	remaining: 20.4s
823:	learn: 1.0533181	total: 14.3s	remaining: 20.4s
824:	learn: 1.0532670	total: 14.3s	remaining: 20.3s
825:	learn: 1.0531788	total: 14.3s	remaining: 20.3s
826:	learn: 1.0530847	total: 14.3s	remaining: 20.3s
827:	learn: 1.0529351	total: 14.3s	remaining: 20.3s
828:	learn: 1.0528525	total: 14.4s	remaining: 20.3s
829:	learn: 1.0527838	total: 14.4s	remaining: 20.3s
830:	learn: 1.0526911	total: 14.4s	remaining: 20.2s
831:	learn: 1.0526257	total: 14.4s	remaining: 20.2s
832:	learn: 1.0525912	total: 14.4s	remaining: 20.2s
833:	learn: 1.0524720	total: 14.4s	remaining: 20.2s
834:	learn: 1.0523939	total: 14.5s	remaining: 20.2s
835:	learn: 1.0522672	total: 14.5s	remaining: 20.2s
836:	learn: 1.0522018	total: 14.5s	remaining: 20.1s
837:	learn: 1.0521650	total: 14.5s	remaining: 20.1s
838:	learn: 1.0521393	total: 14.5s	remaining: 20.1s
839:	learn: 1.0521156	total: 14.5s	remaining: 20.1s
840:	learn: 1.0520713	total: 14.6s	remaining: 20.1s
841:	learn: 

984:	learn: 1.0394981	total: 17s	remaining: 17.6s
985:	learn: 1.0394065	total: 17.1s	remaining: 17.5s
986:	learn: 1.0393092	total: 17.1s	remaining: 17.5s
987:	learn: 1.0392428	total: 17.1s	remaining: 17.5s
988:	learn: 1.0390946	total: 17.1s	remaining: 17.5s
989:	learn: 1.0390048	total: 17.1s	remaining: 17.5s
990:	learn: 1.0389252	total: 17.1s	remaining: 17.5s
991:	learn: 1.0388627	total: 17.2s	remaining: 17.4s
992:	learn: 1.0387703	total: 17.2s	remaining: 17.4s
993:	learn: 1.0386684	total: 17.2s	remaining: 17.4s
994:	learn: 1.0385566	total: 17.2s	remaining: 17.4s
995:	learn: 1.0385083	total: 17.2s	remaining: 17.4s
996:	learn: 1.0384297	total: 17.3s	remaining: 17.4s
997:	learn: 1.0383458	total: 17.3s	remaining: 17.4s
998:	learn: 1.0382203	total: 17.3s	remaining: 17.3s
999:	learn: 1.0380933	total: 17.3s	remaining: 17.3s
1000:	learn: 1.0379835	total: 17.3s	remaining: 17.3s
1001:	learn: 1.0379216	total: 17.4s	remaining: 17.3s
1002:	learn: 1.0378690	total: 17.4s	remaining: 17.3s
1003:	learn

1141:	learn: 1.0253034	total: 19.8s	remaining: 14.9s
1142:	learn: 1.0252099	total: 19.8s	remaining: 14.9s
1143:	learn: 1.0250810	total: 19.9s	remaining: 14.9s
1144:	learn: 1.0249463	total: 19.9s	remaining: 14.8s
1145:	learn: 1.0247972	total: 19.9s	remaining: 14.8s
1146:	learn: 1.0247399	total: 19.9s	remaining: 14.8s
1147:	learn: 1.0246223	total: 19.9s	remaining: 14.8s
1148:	learn: 1.0245158	total: 19.9s	remaining: 14.8s
1149:	learn: 1.0244053	total: 20s	remaining: 14.8s
1150:	learn: 1.0242984	total: 20s	remaining: 14.7s
1151:	learn: 1.0242373	total: 20s	remaining: 14.7s
1152:	learn: 1.0241576	total: 20s	remaining: 14.7s
1153:	learn: 1.0240937	total: 20s	remaining: 14.7s
1154:	learn: 1.0239794	total: 20.1s	remaining: 14.7s
1155:	learn: 1.0238763	total: 20.1s	remaining: 14.7s
1156:	learn: 1.0238023	total: 20.1s	remaining: 14.6s
1157:	learn: 1.0236997	total: 20.1s	remaining: 14.6s
1158:	learn: 1.0236059	total: 20.1s	remaining: 14.6s
1159:	learn: 1.0234768	total: 20.2s	remaining: 14.6s
116

1297:	learn: 1.0120121	total: 22.6s	remaining: 12.2s
1298:	learn: 1.0119137	total: 22.6s	remaining: 12.2s
1299:	learn: 1.0118827	total: 22.6s	remaining: 12.2s
1300:	learn: 1.0118126	total: 22.7s	remaining: 12.2s
1301:	learn: 1.0117211	total: 22.7s	remaining: 12.2s
1302:	learn: 1.0116049	total: 22.7s	remaining: 12.1s
1303:	learn: 1.0114896	total: 22.7s	remaining: 12.1s
1304:	learn: 1.0114331	total: 22.7s	remaining: 12.1s
1305:	learn: 1.0113685	total: 22.8s	remaining: 12.1s
1306:	learn: 1.0113229	total: 22.8s	remaining: 12.1s
1307:	learn: 1.0112184	total: 22.8s	remaining: 12.1s
1308:	learn: 1.0111223	total: 22.8s	remaining: 12s
1309:	learn: 1.0110555	total: 22.8s	remaining: 12s
1310:	learn: 1.0109233	total: 22.9s	remaining: 12s
1311:	learn: 1.0108139	total: 22.9s	remaining: 12s
1312:	learn: 1.0107252	total: 22.9s	remaining: 12s
1313:	learn: 1.0106409	total: 22.9s	remaining: 12s
1314:	learn: 1.0105524	total: 22.9s	remaining: 11.9s
1315:	learn: 1.0104482	total: 23s	remaining: 11.9s
1316:	l

1455:	learn: 1.0001823	total: 25.6s	remaining: 9.57s
1456:	learn: 1.0000877	total: 25.6s	remaining: 9.55s
1457:	learn: 1.0000381	total: 25.6s	remaining: 9.53s
1458:	learn: 0.9999233	total: 25.7s	remaining: 9.51s
1459:	learn: 0.9998310	total: 25.7s	remaining: 9.5s
1460:	learn: 0.9997392	total: 25.7s	remaining: 9.48s
1461:	learn: 0.9996758	total: 25.7s	remaining: 9.46s
1462:	learn: 0.9995868	total: 25.7s	remaining: 9.45s
1463:	learn: 0.9995310	total: 25.8s	remaining: 9.43s
1464:	learn: 0.9994161	total: 25.8s	remaining: 9.41s
1465:	learn: 0.9993349	total: 25.8s	remaining: 9.39s
1466:	learn: 0.9992622	total: 25.8s	remaining: 9.38s
1467:	learn: 0.9991442	total: 25.8s	remaining: 9.36s
1468:	learn: 0.9990550	total: 25.8s	remaining: 9.34s
1469:	learn: 0.9990276	total: 25.9s	remaining: 9.32s
1470:	learn: 0.9989191	total: 25.9s	remaining: 9.31s
1471:	learn: 0.9988448	total: 25.9s	remaining: 9.29s
1472:	learn: 0.9987541	total: 25.9s	remaining: 9.27s
1473:	learn: 0.9986937	total: 25.9s	remaining: 

1620:	learn: 0.9888580	total: 28.4s	remaining: 6.63s
1621:	learn: 0.9888145	total: 28.4s	remaining: 6.62s
1622:	learn: 0.9887642	total: 28.4s	remaining: 6.6s
1623:	learn: 0.9886597	total: 28.4s	remaining: 6.58s
1624:	learn: 0.9886265	total: 28.4s	remaining: 6.57s
1625:	learn: 0.9885453	total: 28.5s	remaining: 6.55s
1626:	learn: 0.9884404	total: 28.5s	remaining: 6.53s
1627:	learn: 0.9883779	total: 28.5s	remaining: 6.51s
1628:	learn: 0.9883001	total: 28.5s	remaining: 6.5s
1629:	learn: 0.9882327	total: 28.5s	remaining: 6.48s
1630:	learn: 0.9881723	total: 28.6s	remaining: 6.46s
1631:	learn: 0.9880975	total: 28.6s	remaining: 6.44s
1632:	learn: 0.9880363	total: 28.6s	remaining: 6.43s
1633:	learn: 0.9879725	total: 28.6s	remaining: 6.41s
1634:	learn: 0.9879317	total: 28.6s	remaining: 6.39s
1635:	learn: 0.9878879	total: 28.6s	remaining: 6.37s
1636:	learn: 0.9878234	total: 28.7s	remaining: 6.36s
1637:	learn: 0.9877524	total: 28.7s	remaining: 6.34s
1638:	learn: 0.9877242	total: 28.7s	remaining: 6

1782:	learn: 0.9791180	total: 31.2s	remaining: 3.8s
1783:	learn: 0.9790198	total: 31.2s	remaining: 3.78s
1784:	learn: 0.9789876	total: 31.3s	remaining: 3.77s
1785:	learn: 0.9789215	total: 31.3s	remaining: 3.75s
1786:	learn: 0.9788670	total: 31.3s	remaining: 3.73s
1787:	learn: 0.9788062	total: 31.3s	remaining: 3.71s
1788:	learn: 0.9787401	total: 31.3s	remaining: 3.7s
1789:	learn: 0.9786692	total: 31.4s	remaining: 3.68s
1790:	learn: 0.9786225	total: 31.4s	remaining: 3.66s
1791:	learn: 0.9785371	total: 31.4s	remaining: 3.65s
1792:	learn: 0.9784820	total: 31.4s	remaining: 3.63s
1793:	learn: 0.9784144	total: 31.4s	remaining: 3.61s
1794:	learn: 0.9783517	total: 31.5s	remaining: 3.59s
1795:	learn: 0.9783253	total: 31.5s	remaining: 3.57s
1796:	learn: 0.9782195	total: 31.5s	remaining: 3.56s
1797:	learn: 0.9781373	total: 31.5s	remaining: 3.54s
1798:	learn: 0.9780604	total: 31.5s	remaining: 3.52s
1799:	learn: 0.9779768	total: 31.5s	remaining: 3.5s
1800:	learn: 0.9779036	total: 31.6s	remaining: 3.

1944:	learn: 0.9691281	total: 34s	remaining: 961ms
1945:	learn: 0.9690653	total: 34s	remaining: 943ms
1946:	learn: 0.9690130	total: 34s	remaining: 926ms
1947:	learn: 0.9689540	total: 34s	remaining: 908ms
1948:	learn: 0.9688918	total: 34s	remaining: 891ms
1949:	learn: 0.9688632	total: 34s	remaining: 873ms
1950:	learn: 0.9687998	total: 34.1s	remaining: 856ms
1951:	learn: 0.9687621	total: 34.1s	remaining: 838ms
1952:	learn: 0.9687267	total: 34.1s	remaining: 821ms
1953:	learn: 0.9686865	total: 34.1s	remaining: 803ms
1954:	learn: 0.9686050	total: 34.1s	remaining: 786ms
1955:	learn: 0.9685606	total: 34.1s	remaining: 768ms
1956:	learn: 0.9684914	total: 34.2s	remaining: 751ms
1957:	learn: 0.9684069	total: 34.2s	remaining: 733ms
1958:	learn: 0.9683395	total: 34.2s	remaining: 716ms
1959:	learn: 0.9682783	total: 34.2s	remaining: 698ms
1960:	learn: 0.9682314	total: 34.2s	remaining: 681ms
1961:	learn: 0.9681948	total: 34.3s	remaining: 663ms
1962:	learn: 0.9681694	total: 34.3s	remaining: 646ms
1963:

108:	learn: 1.1880790	total: 1.9s	remaining: 33s
109:	learn: 1.1874620	total: 1.92s	remaining: 33.1s
110:	learn: 1.1867979	total: 1.94s	remaining: 33s
111:	learn: 1.1862781	total: 1.96s	remaining: 33.1s
112:	learn: 1.1855759	total: 1.98s	remaining: 33.1s
113:	learn: 1.1851491	total: 2s	remaining: 33s
114:	learn: 1.1842245	total: 2.01s	remaining: 33s
115:	learn: 1.1833544	total: 2.03s	remaining: 33s
116:	learn: 1.1827389	total: 2.05s	remaining: 33s
117:	learn: 1.1817801	total: 2.06s	remaining: 32.9s
118:	learn: 1.1813648	total: 2.08s	remaining: 32.9s
119:	learn: 1.1804528	total: 2.1s	remaining: 32.9s
120:	learn: 1.1796265	total: 2.12s	remaining: 32.9s
121:	learn: 1.1787386	total: 2.13s	remaining: 32.9s
122:	learn: 1.1780514	total: 2.15s	remaining: 32.8s
123:	learn: 1.1772982	total: 2.17s	remaining: 32.8s
124:	learn: 1.1764738	total: 2.19s	remaining: 32.8s
125:	learn: 1.1756852	total: 2.21s	remaining: 32.8s
126:	learn: 1.1749083	total: 2.22s	remaining: 32.8s
127:	learn: 1.1743000	total: 

275:	learn: 1.1222568	total: 5.02s	remaining: 31.4s
276:	learn: 1.1220802	total: 5.04s	remaining: 31.3s
277:	learn: 1.1217909	total: 5.06s	remaining: 31.3s
278:	learn: 1.1215152	total: 5.07s	remaining: 31.3s
279:	learn: 1.1213384	total: 5.09s	remaining: 31.3s
280:	learn: 1.1211662	total: 5.11s	remaining: 31.2s
281:	learn: 1.1210480	total: 5.12s	remaining: 31.2s
282:	learn: 1.1208362	total: 5.14s	remaining: 31.2s
283:	learn: 1.1205946	total: 5.15s	remaining: 31.1s
284:	learn: 1.1204523	total: 5.17s	remaining: 31.1s
285:	learn: 1.1201465	total: 5.19s	remaining: 31.1s
286:	learn: 1.1199781	total: 5.21s	remaining: 31.1s
287:	learn: 1.1198302	total: 5.22s	remaining: 31s
288:	learn: 1.1196247	total: 5.24s	remaining: 31s
289:	learn: 1.1195155	total: 5.25s	remaining: 31s
290:	learn: 1.1193510	total: 5.27s	remaining: 31s
291:	learn: 1.1190633	total: 5.29s	remaining: 30.9s
292:	learn: 1.1187868	total: 5.3s	remaining: 30.9s
293:	learn: 1.1185794	total: 5.32s	remaining: 30.9s
294:	learn: 1.1184258

444:	learn: 1.0942404	total: 7.96s	remaining: 27.8s
445:	learn: 1.0941837	total: 7.97s	remaining: 27.8s
446:	learn: 1.0940613	total: 7.99s	remaining: 27.8s
447:	learn: 1.0939817	total: 8.01s	remaining: 27.7s
448:	learn: 1.0938993	total: 8.03s	remaining: 27.7s
449:	learn: 1.0937269	total: 8.04s	remaining: 27.7s
450:	learn: 1.0936161	total: 8.06s	remaining: 27.7s
451:	learn: 1.0933831	total: 8.08s	remaining: 27.7s
452:	learn: 1.0932364	total: 8.09s	remaining: 27.6s
453:	learn: 1.0930782	total: 8.11s	remaining: 27.6s
454:	learn: 1.0929619	total: 8.13s	remaining: 27.6s
455:	learn: 1.0928598	total: 8.14s	remaining: 27.6s
456:	learn: 1.0927369	total: 8.16s	remaining: 27.6s
457:	learn: 1.0926782	total: 8.18s	remaining: 27.5s
458:	learn: 1.0924773	total: 8.19s	remaining: 27.5s
459:	learn: 1.0923556	total: 8.21s	remaining: 27.5s
460:	learn: 1.0922465	total: 8.22s	remaining: 27.5s
461:	learn: 1.0920473	total: 8.24s	remaining: 27.4s
462:	learn: 1.0918770	total: 8.26s	remaining: 27.4s
463:	learn: 

603:	learn: 1.0761605	total: 10.7s	remaining: 24.7s
604:	learn: 1.0761170	total: 10.7s	remaining: 24.7s
605:	learn: 1.0760147	total: 10.7s	remaining: 24.7s
606:	learn: 1.0758869	total: 10.7s	remaining: 24.7s
607:	learn: 1.0758303	total: 10.8s	remaining: 24.6s
608:	learn: 1.0757259	total: 10.8s	remaining: 24.6s
609:	learn: 1.0756556	total: 10.8s	remaining: 24.6s
610:	learn: 1.0755536	total: 10.8s	remaining: 24.6s
611:	learn: 1.0754447	total: 10.8s	remaining: 24.6s
612:	learn: 1.0753832	total: 10.8s	remaining: 24.5s
613:	learn: 1.0753524	total: 10.9s	remaining: 24.5s
614:	learn: 1.0752321	total: 10.9s	remaining: 24.5s
615:	learn: 1.0751057	total: 10.9s	remaining: 24.5s
616:	learn: 1.0750386	total: 10.9s	remaining: 24.5s
617:	learn: 1.0749397	total: 10.9s	remaining: 24.4s
618:	learn: 1.0748770	total: 10.9s	remaining: 24.4s
619:	learn: 1.0747510	total: 11s	remaining: 24.4s
620:	learn: 1.0745841	total: 11s	remaining: 24.4s
621:	learn: 1.0745115	total: 11s	remaining: 24.4s
622:	learn: 1.0744

766:	learn: 1.0611426	total: 13.4s	remaining: 21.6s
767:	learn: 1.0610049	total: 13.5s	remaining: 21.6s
768:	learn: 1.0609486	total: 13.5s	remaining: 21.6s
769:	learn: 1.0607994	total: 13.5s	remaining: 21.6s
770:	learn: 1.0606858	total: 13.5s	remaining: 21.6s
771:	learn: 1.0606048	total: 13.5s	remaining: 21.5s
772:	learn: 1.0604767	total: 13.6s	remaining: 21.5s
773:	learn: 1.0604084	total: 13.6s	remaining: 21.5s
774:	learn: 1.0603667	total: 13.6s	remaining: 21.5s
775:	learn: 1.0602991	total: 13.6s	remaining: 21.5s
776:	learn: 1.0602453	total: 13.6s	remaining: 21.4s
777:	learn: 1.0601180	total: 13.6s	remaining: 21.4s
778:	learn: 1.0600436	total: 13.7s	remaining: 21.4s
779:	learn: 1.0600255	total: 13.7s	remaining: 21.4s
780:	learn: 1.0599425	total: 13.7s	remaining: 21.4s
781:	learn: 1.0598514	total: 13.7s	remaining: 21.4s
782:	learn: 1.0597754	total: 13.7s	remaining: 21.3s
783:	learn: 1.0597615	total: 13.7s	remaining: 21.3s
784:	learn: 1.0596191	total: 13.8s	remaining: 21.3s
785:	learn: 

927:	learn: 1.0464544	total: 16.2s	remaining: 18.7s
928:	learn: 1.0464041	total: 16.2s	remaining: 18.7s
929:	learn: 1.0462998	total: 16.2s	remaining: 18.7s
930:	learn: 1.0461703	total: 16.3s	remaining: 18.7s
931:	learn: 1.0460624	total: 16.3s	remaining: 18.7s
932:	learn: 1.0459823	total: 16.3s	remaining: 18.6s
933:	learn: 1.0458859	total: 16.3s	remaining: 18.6s
934:	learn: 1.0458391	total: 16.3s	remaining: 18.6s
935:	learn: 1.0458084	total: 16.4s	remaining: 18.6s
936:	learn: 1.0456864	total: 16.4s	remaining: 18.6s
937:	learn: 1.0456193	total: 16.4s	remaining: 18.6s
938:	learn: 1.0455621	total: 16.4s	remaining: 18.6s
939:	learn: 1.0454470	total: 16.4s	remaining: 18.5s
940:	learn: 1.0453870	total: 16.5s	remaining: 18.5s
941:	learn: 1.0452714	total: 16.5s	remaining: 18.5s
942:	learn: 1.0451786	total: 16.5s	remaining: 18.5s
943:	learn: 1.0450866	total: 16.5s	remaining: 18.5s
944:	learn: 1.0450418	total: 16.5s	remaining: 18.4s
945:	learn: 1.0449296	total: 16.5s	remaining: 18.4s
946:	learn: 

1091:	learn: 1.0314915	total: 19.2s	remaining: 15.9s
1092:	learn: 1.0314638	total: 19.2s	remaining: 15.9s
1093:	learn: 1.0313591	total: 19.2s	remaining: 15.9s
1094:	learn: 1.0312476	total: 19.2s	remaining: 15.9s
1095:	learn: 1.0311989	total: 19.2s	remaining: 15.9s
1096:	learn: 1.0310979	total: 19.3s	remaining: 15.8s
1097:	learn: 1.0309786	total: 19.3s	remaining: 15.8s
1098:	learn: 1.0308974	total: 19.3s	remaining: 15.8s
1099:	learn: 1.0307839	total: 19.3s	remaining: 15.8s
1100:	learn: 1.0306805	total: 19.3s	remaining: 15.8s
1101:	learn: 1.0306212	total: 19.4s	remaining: 15.8s
1102:	learn: 1.0305174	total: 19.4s	remaining: 15.8s
1103:	learn: 1.0304218	total: 19.4s	remaining: 15.7s
1104:	learn: 1.0303526	total: 19.4s	remaining: 15.7s
1105:	learn: 1.0302533	total: 19.4s	remaining: 15.7s
1106:	learn: 1.0301520	total: 19.5s	remaining: 15.7s
1107:	learn: 1.0300826	total: 19.5s	remaining: 15.7s
1108:	learn: 1.0299775	total: 19.5s	remaining: 15.7s
1109:	learn: 1.0298997	total: 19.5s	remaining:

1254:	learn: 1.0172458	total: 22.3s	remaining: 13.2s
1255:	learn: 1.0171569	total: 22.3s	remaining: 13.2s
1256:	learn: 1.0170918	total: 22.4s	remaining: 13.2s
1257:	learn: 1.0170551	total: 22.4s	remaining: 13.2s
1258:	learn: 1.0169960	total: 22.4s	remaining: 13.2s
1259:	learn: 1.0169151	total: 22.4s	remaining: 13.2s
1260:	learn: 1.0168664	total: 22.4s	remaining: 13.1s
1261:	learn: 1.0167562	total: 22.4s	remaining: 13.1s
1262:	learn: 1.0166553	total: 22.5s	remaining: 13.1s
1263:	learn: 1.0165720	total: 22.5s	remaining: 13.1s
1264:	learn: 1.0165029	total: 22.5s	remaining: 13.1s
1265:	learn: 1.0163995	total: 22.5s	remaining: 13.1s
1266:	learn: 1.0162899	total: 22.5s	remaining: 13s
1267:	learn: 1.0162555	total: 22.5s	remaining: 13s
1268:	learn: 1.0162107	total: 22.6s	remaining: 13s
1269:	learn: 1.0161287	total: 22.6s	remaining: 13s
1270:	learn: 1.0160454	total: 22.6s	remaining: 13s
1271:	learn: 1.0159141	total: 22.6s	remaining: 12.9s
1272:	learn: 1.0158550	total: 22.6s	remaining: 12.9s
127

1415:	learn: 1.0050215	total: 25.1s	remaining: 10.4s
1416:	learn: 1.0049222	total: 25.1s	remaining: 10.3s
1417:	learn: 1.0048027	total: 25.1s	remaining: 10.3s
1418:	learn: 1.0047324	total: 25.2s	remaining: 10.3s
1419:	learn: 1.0046399	total: 25.2s	remaining: 10.3s
1420:	learn: 1.0045747	total: 25.2s	remaining: 10.3s
1421:	learn: 1.0044448	total: 25.2s	remaining: 10.3s
1422:	learn: 1.0043967	total: 25.2s	remaining: 10.2s
1423:	learn: 1.0042923	total: 25.3s	remaining: 10.2s
1424:	learn: 1.0042575	total: 25.3s	remaining: 10.2s
1425:	learn: 1.0041841	total: 25.3s	remaining: 10.2s
1426:	learn: 1.0041156	total: 25.3s	remaining: 10.2s
1427:	learn: 1.0040645	total: 25.3s	remaining: 10.2s
1428:	learn: 1.0039532	total: 25.4s	remaining: 10.1s
1429:	learn: 1.0039187	total: 25.4s	remaining: 10.1s
1430:	learn: 1.0038410	total: 25.4s	remaining: 10.1s
1431:	learn: 1.0037828	total: 25.4s	remaining: 10.1s
1432:	learn: 1.0037122	total: 25.4s	remaining: 10.1s
1433:	learn: 1.0036381	total: 25.5s	remaining:

KeyboardInterrupt: 

In [None]:
quick_eval(train, rf, cv=True)
#quick_eval(train, lr, cv=True)
#quick_eval(train, sgd, cv=True)
quick_eval(train, rc, cv=True)
#quick_eval(train, nb, cv=True)
quick_eval(train, ac, cv=True)
quick_eval(train, et, cv=True)
quick_eval(train, bc, cv=True)
quick_eval(train, gbc, cv=True)
quick_eval(train, clf, cv=True)


#quick_eval(train, knn, scale=True, cv=True)
quick_eval(train, svc, scale=True, cv=True)

## more models, to include sklearn neural net

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier

In [10]:
mlp = MLPClassifier(verbose=1)
#gpc = GaussianProcessClassifier(verbose=1) #takes a long time
#rbf = RBF()
dt = DecisionTreeClassifier()

In [None]:
quick_eval(train, mlp, cv=True)
#quick_eval(train, gpc, cv=True)    TRY LATER, THIS WAS VERY SLOW
#quick_eval(train, rbf, cv=True)   poor
quick_eval(train, dt, cv=True)    

In [11]:
bc_rf = BaggingClassifier(
    base_estimator=RandomForestClassifier(max_depth=10),
    n_estimators=20)

bc_gbc = BaggingClassifier(
    base_estimator=GradientBoostingClassifier(),
    n_estimators=20)

bc_abc = BaggingClassifier(
    base_estimator=AdaBoostClassifier(),
    n_estimators=20)

#quick_eval(train, bc_rf)    # 0.549
#quick_eval(train, bc_gbc)   # 0.576
#quick_eval(train, bc_abc)   # 0.563

## Model Ensembling

In [12]:
vc = VotingClassifier(estimators=[
    ('Adaboost',ac),
    ('rf', rf), 
    ('gbc', gbc),
    ('et', et),
    ('svc', svc),
    ('rc', rc),
    ('mlp', mlp),
    ('dt', dt),
    ('bc_rf', bc_rf),
    ('bc_gbc', bc_gbc),
    ('bc_abc', bc_abc),
    #('catboost', clf)
    ],
                      n_jobs=-1,
                     voting='hard')

print('start eval')
quick_eval(train, vc)#, cv=True)     #initial w/o catboost .564

start eval


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


The accuracy of VotingClassifier is 0.5604861503674392


('VotingClassifier', 0.5604861503674392)

In [13]:
estimators = [
    ('Adaboost',ac),
    ('rf', rf), 
    ('gbc', gbc),
    ('et', et),
    ('svc', svc),
    ('rc', rc),
    ('mlp', mlp),
    ('dt', dt),
    ('bc_rf', bc_rf),
    ('bc_gbc', bc_gbc),
    ('bc_abc', bc_abc),
]
stacking_clf = StackingClassifier(estimators=estimators, final_estimator=RandomForestClassifier())
quick_eval(train, stacking_clf)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    4.1s finished


      Iter       Train Loss   Remaining Time 
         1       16978.4861           31.08s
         2       16817.0666           30.77s
         3       16642.2490           30.01s
         4       16510.3172           29.50s
         5       16422.2883           29.07s
         6       16340.6270           28.70s
         7       16255.6865           28.36s
         8       16194.3187           28.07s
         9       16142.4173           27.71s
        10       16087.0065           27.36s
        20       15636.0775           24.21s
        30       15311.6369           21.14s
        40       15069.8481           18.11s
        50       14879.6563           15.08s
        60       14696.4078           12.05s
        70       14549.9058            9.03s
        80       14406.0025            6.02s
        90       14273.0728            3.02s
       100       14143.0882            0.00s
[LibSVM]Iteration 1, loss = inf
Iteration 2, loss = inf
Iteration 3, loss = inf
Iteration 4, loss =

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_j

      Iter       Train Loss   Remaining Time 
         1       13575.6663           27.13s
         2       13442.6125           26.26s
         3       13301.6421           25.61s
         4       13215.3168           25.46s
         5       13147.9893           25.69s
         6       13057.3801           26.13s
         7       13004.1543           27.13s
         8       12934.7473           27.15s
         9       12886.4462           26.93s
        10       12830.3645           26.93s
        20       12422.6806           22.46s
        30       12150.8851           19.05s
        40       11940.6606           16.14s
        50       11755.5531           13.30s
        60       11597.0140           10.61s
        70       11447.3265            8.02s
        80       11314.9436            5.38s
        90       11194.3068            2.69s
       100       11083.9757            0.00s
      Iter       Train Loss   Remaining Time 
         1       13649.4483           25.65s
        

Iteration 146, loss = inf
Iteration 147, loss = inf
Iteration 148, loss = inf
Iteration 149, loss = inf
Iteration 150, loss = inf
Iteration 151, loss = inf
Iteration 152, loss = inf
Iteration 153, loss = inf
Iteration 154, loss = inf
Iteration 155, loss = inf
Iteration 156, loss = inf
Iteration 157, loss = inf
Iteration 158, loss = inf
Iteration 159, loss = inf
Iteration 160, loss = inf
Iteration 161, loss = inf
Iteration 162, loss = inf
Iteration 163, loss = inf
Iteration 164, loss = inf
Iteration 165, loss = inf
Iteration 166, loss = inf
Iteration 167, loss = inf
Iteration 168, loss = inf
Iteration 169, loss = inf
Iteration 170, loss = inf
Iteration 171, loss = inf
Iteration 172, loss = inf
Iteration 173, loss = inf
Iteration 174, loss = inf
Iteration 175, loss = inf
Iteration 176, loss = inf
Iteration 177, loss = inf
Iteration 178, loss = inf
Iteration 179, loss = inf
Iteration 180, loss = inf
Iteration 181, loss = inf
Iteration 182, loss = inf
Iteration 183, loss = inf
Iteration 18

Iteration 69, loss = inf
Iteration 70, loss = inf
Iteration 71, loss = inf
Iteration 72, loss = inf
Iteration 73, loss = inf
Iteration 74, loss = inf
Iteration 75, loss = inf
Iteration 76, loss = inf
Iteration 77, loss = inf
Iteration 78, loss = inf
Iteration 79, loss = inf
Iteration 80, loss = inf
Iteration 81, loss = inf
Iteration 82, loss = inf
Iteration 83, loss = inf
Iteration 84, loss = inf
Iteration 85, loss = inf
Iteration 86, loss = inf
Iteration 87, loss = inf
Iteration 88, loss = inf
Iteration 89, loss = inf
Iteration 90, loss = inf
Iteration 91, loss = inf
Iteration 92, loss = inf
Iteration 93, loss = inf
Iteration 94, loss = inf
Iteration 95, loss = inf
Iteration 96, loss = inf
Iteration 97, loss = inf
Iteration 98, loss = inf
Iteration 99, loss = inf
Iteration 100, loss = inf
Iteration 101, loss = inf
Iteration 102, loss = inf
Iteration 103, loss = inf
Iteration 104, loss = inf
Iteration 105, loss = inf
Iteration 106, loss = inf
Iteration 107, loss = inf
Iteration 108, lo

Iteration 190, loss = inf
Iteration 191, loss = inf
Iteration 192, loss = inf
Iteration 193, loss = inf
Iteration 194, loss = inf
Iteration 195, loss = inf
Iteration 196, loss = inf
Iteration 197, loss = inf
Iteration 198, loss = inf
Iteration 199, loss = inf
Iteration 200, loss = inf
Iteration 1, loss = inf
Iteration 2, loss = inf
Iteration 3, loss = inf
Iteration 4, loss = inf
Iteration 5, loss = inf
Iteration 6, loss = inf
Iteration 7, loss = inf
Iteration 8, loss = inf
Iteration 9, loss = inf
Iteration 10, loss = inf
Iteration 11, loss = inf
Iteration 12, loss = inf
Iteration 13, loss = inf
Iteration 14, loss = inf
Iteration 15, loss = inf
Iteration 16, loss = inf
Iteration 17, loss = inf
Iteration 18, loss = inf
Iteration 19, loss = inf
Iteration 20, loss = inf
Iteration 21, loss = inf
Iteration 22, loss = inf
Iteration 23, loss = inf
Iteration 24, loss = inf
Iteration 25, loss = inf
Iteration 26, loss = inf
Iteration 27, loss = inf
Iteration 28, loss = inf
Iteration 29, loss = in

KeyboardInterrupt: 

In [29]:
joblib.dump(stacking_clf, 'fitted_stacked_classifier.pkl')

['fitted_stacked_classifier.pkl']

In [32]:
test = test._get_numeric_data()
#stacking_clf.predict()

test_prediction = stacking_clf.predict(test)

# To create a submission:
submission = pd.DataFrame()
submission['installation_id'] = test.installation_id
submission['accuracy_group'] = test_prediction
#submission.head()
submission.to_csv('preds01032020.csv')
submission.accuracy_group.value_counts()

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


NotFittedError: This RandomForestClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.