# Instant Gratification Solution (118th place, top 7%, bronze medal)

![](https://storage.googleapis.com/kaggle-media/competitions/general/Kerneler-white-desc2_transparent.png)

## 1. Dependencies and utility functions

In [1]:
# Dependencies
import subprocess
import re
import sys
import os
import glob
import warnings
import ctypes
import time
from tqdm import tqdm

import numpy as np
import pandas as pd

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.mixture import GaussianMixture
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from sklearn.covariance import GraphicalLasso
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Optimization code to make BLAS single-threaded

_MKL_ = 'mkl'
_OPENBLAS_ = 'openblas'


class BLAS:
    def __init__(self, cdll, kind):
        if kind not in (_MKL_, _OPENBLAS_):
            raise ValueError(f'kind must be {MKL} or {OPENBLAS}, got {kind} instead.')
        
        self.kind = kind
        self.cdll = cdll
        
        if kind == _MKL_:
            self.get_n_threads = cdll.MKL_Get_Max_Threads
            self.set_n_threads = cdll.MKL_Set_Num_Threads
        else:
            self.get_n_threads = cdll.openblas_get_num_threads
            self.set_n_threads = cdll.openblas_set_num_threads
            

def get_blas(numpy_module):
    LDD = 'ldd'
    LDD_PATTERN = r'^\t(?P<lib>.*{}.*) => (?P<path>.*) \(0x.*$'

    NUMPY_PATH = os.path.join(numpy_module.__path__[0], 'core')
    MULTIARRAY_PATH = glob.glob(os.path.join(NUMPY_PATH, '_multiarray_umath.*so'))[0]
    ldd_result = subprocess.run(
        args=[LDD, MULTIARRAY_PATH], 
        check=True,
        stdout=subprocess.PIPE, 
        universal_newlines=True
    )

    output = ldd_result.stdout

    if _MKL_ in output:
        kind = _MKL_
    elif _OPENBLAS_ in output:
        kind = _OPENBLAS_
    else:
        return

    pattern = LDD_PATTERN.format(kind)
    match = re.search(pattern, output, flags=re.MULTILINE)

    if match:
        lib = ctypes.CDLL(match.groupdict()['path'])
        return BLAS(lib, kind)
    

class single_threaded:
    def __init__(self, numpy_module=None):
        if numpy_module is not None:
            self.blas = get_blas(numpy_module)
        else:
            import numpy
            self.blas = get_blas(numpy)

    def __enter__(self):
        if self.blas is not None:
            self.old_n_threads = self.blas.get_n_threads()
            self.blas.set_n_threads(1)
        else:
            warnings.warn(
                'No MKL/OpenBLAS found, assuming NumPy is single-threaded.'
            )

    def __exit__(self, *args):
        if self.blas is not None:
            self.blas.set_n_threads(self.old_n_threads)
            if self.blas.get_n_threads() != self.old_n_threads:
                message = (
                    f'Failed to reset {self.blas.kind} '
                    f'to {self.old_n_threads} threads (previous value).'
                )
                raise RuntimeError(message)
    
    def __call__(self, func):
        def _func(*args, **kwargs):
            self.__enter__()
            func_result = func(*args, **kwargs)
            self.__exit__()
            return func_result
        return _func

In [3]:
# Load the data
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

train.head()

Unnamed: 0,id,muggy-smalt-axolotl-pembus,dorky-peach-sheepdog-ordinal,slimy-seashell-cassowary-goose,snazzy-harlequin-chicken-distraction,frumpy-smalt-mau-ordinal,stealthy-beige-pinscher-golden,chummy-cream-tarantula-entropy,hazy-emerald-cuttlefish-unsorted,nerdy-indigo-wolfhound-sorted,leaky-amaranth-lizard-sorted,ugly-tangerine-chihuahua-important,shaggy-silver-indri-fimbus,flaky-chocolate-beetle-grandmaster,squirrely-harlequin-sheep-sumble,freaky-tan-angelfish-noise,lousy-plum-penguin-sumble,bluesy-rose-wallaby-discard,baggy-copper-oriole-dummy,stealthy-scarlet-hound-fepid,greasy-cinnamon-bonobo-contributor,cranky-cardinal-dogfish-ordinal,snippy-auburn-vole-learn,greasy-sepia-coral-dataset,flabby-tangerine-fowl-entropy,lousy-smalt-pinscher-dummy,bluesy-brass-chihuahua-distraction,goopy-eggplant-indri-entropy,homey-sepia-bombay-sorted,homely-ruby-bulldog-entropy,hasty-blue-sheep-contributor,blurry-wisteria-oyster-master,snoopy-auburn-dogfish-expert,stinky-maroon-blue-kernel,bumpy-amaranth-armadillo-important,slaphappy-peach-oyster-master,dorky-tomato-ragdoll-dataset,messy-mauve-wolverine-ordinal,geeky-pumpkin-moorhen-important,crabby-teal-otter-unsorted,...,beady-mauve-frog-distraction,surly-brass-maltese-ordinal,beady-asparagus-opossum-expert,beady-rust-impala-dummy,droopy-amethyst-dachshund-hint,homey-crimson-budgerigar-grandmaster,droopy-cardinal-impala-important,woozy-apricot-moose-hint,paltry-sapphire-labradoodle-dummy,crappy-carmine-eagle-entropy,greasy-magnolia-spider-grandmaster,crabby-carmine-flounder-sorted,skimpy-copper-fowl-grandmaster,hasty-seashell-woodpecker-hint,snappy-purple-bobcat-important,thirsty-carmine-corgi-ordinal,homely-auburn-reindeer-unsorted,crappy-beige-tiger-fepid,cranky-auburn-swan-novice,chewy-bistre-buzzard-expert,skinny-cyan-macaque-pembus,slimy-periwinkle-otter-expert,snazzy-burgundy-clam-novice,cozy-ochre-gorilla-gaussian,homey-sangria-wolfhound-dummy,snazzy-asparagus-hippopotamus-contributor,paltry-red-hamster-sorted,zippy-dandelion-insect-golden,baggy-coral-bandicoot-unsorted,goopy-lavender-wolverine-fimbus,wheezy-myrtle-mandrill-entropy,wiggy-lilac-lemming-sorted,gloppy-cerise-snail-contributor,woozy-silver-havanese-gaussian,jumpy-thistle-discus-sorted,muggy-turquoise-donkey-important,blurry-buff-hyena-entropy,bluesy-chocolate-kudu-fepid,gamy-white-monster-expert,target
0,707b395ecdcbb4dc2eabea00e4d1b179,-2.070654,1.01816,0.228643,0.857221,0.052271,0.230303,-6.38509,0.439369,-0.721946,-0.227027,0.575964,1.541908,1.745286,-0.624271,3.600958,1.176489,-0.182776,-0.228391,1.682263,-0.833236,-4.377688,-5.37241,-0.477742,-0.179005,-0.516475,0.127391,-0.857591,-0.4615,2.160303,-2.118371,0.515493,-1.201493,-0.027377,-1.154024,0.753204,-0.179651,-0.807341,-1.663626,0.893806,...,-1.829848,2.347131,0.082462,-1.012654,0.593752,2.904654,-0.428974,-0.919979,2.849575,-0.906744,0.729459,0.38614,0.319814,-0.407682,-0.170667,-1.242919,-1.719046,-0.132395,-0.368991,-5.112553,-2.085988,-0.897257,1.080671,-0.273262,0.342824,0.640177,-0.415298,-0.483126,-0.080799,2.416224,0.351895,0.618824,-1.542423,0.598175,0.611757,0.678772,0.247059,-0.806677,-0.193649,0
1,5880c03c6582a7b42248668e56b4bdec,-0.491702,0.082645,-0.011193,1.071266,-0.346347,-0.082209,0.110579,-0.382374,-0.22962,0.78398,-1.280579,-1.00348,-7.753201,-1.320547,0.919078,-1.036068,0.030213,0.910172,-0.905345,0.646641,-0.465291,-0.531735,-0.756781,0.193724,0.224277,-0.474412,1.885805,0.205439,-6.481422,1.03562,-0.453623,0.375936,-0.32067,-0.144646,-0.220129,0.577826,-0.360512,-0.600107,0.008111,...,0.982205,-1.161978,0.532269,1.133215,0.003503,-1.390962,0.158572,0.143794,-0.317185,1.017192,-0.395342,-0.642357,-0.627209,0.257271,-1.461564,0.325613,1.628369,0.64004,0.750735,1.164573,0.900373,0.063489,0.948158,0.273014,-1.269147,-0.251101,-2.271731,-0.044167,-0.443766,-1.144794,-0.645115,-1.24609,2.613357,-0.479664,1.581289,0.931258,0.151937,-0.766595,0.474351,0
2,4ccbcb3d13e5072ff1d9c61afe2c4f77,-1.680473,0.860529,-1.076195,0.740124,3.678445,0.288558,0.515875,0.92059,-1.223277,-1.02978,-2.203397,-7.088717,0.438218,-0.848173,1.542666,-2.166858,-0.86767,-0.980947,0.567793,1.32343,-2.0767,-0.291598,-1.564816,-8.718695,0.340144,-0.566402,0.844324,0.816421,-1.019114,-0.881431,-2.28571,-0.090958,-0.89844,-0.584417,-0.14366,-0.182084,0.798516,0.010756,-0.347155,...,0.829467,0.588236,0.427946,-0.563037,-0.10399,-0.817698,1.251046,-0.977157,2.7326,1.997984,-0.214285,-0.389428,-1.007633,0.336435,-0.851292,-0.024184,0.455908,0.458753,-0.26723,-2.032402,0.203082,0.654107,-3.512338,-0.840937,0.519407,-0.028053,-1.621083,0.142132,1.514664,0.828815,0.516422,0.130521,-0.45921,2.028205,-0.093968,-0.218274,-0.163136,-0.870289,0.064038,1
3,e350f17a357f12a1941f0837afb7eb8d,0.183774,0.919134,-0.946958,0.918492,0.862278,1.155287,0.911106,0.562598,-1.349685,-1.182729,0.003159,-0.626847,0.36898,1.560784,0.502851,-0.10805,0.633208,-0.411502,-3.201592,-0.710612,0.786816,0.500979,-1.040048,-1.36917,0.987666,-0.681838,-0.331372,2.254289,-0.00933,2.007067,1.20375,-2.003928,-0.566088,0.223452,0.434202,-1.203766,-0.10349,0.441111,1.818458,...,-2.231836,0.833236,-0.454226,-1.614694,0.159948,-0.150059,-1.570599,0.960839,0.102214,0.077236,0.852834,-1.265608,-3.21919,0.251194,0.215861,-0.00952,1.611203,1.679806,-0.008419,0.658384,-0.132437,-1.466823,-1.57708,-0.800346,1.960795,-4.0429,1.722143,-0.261888,-1.145005,-1.864582,-1.168967,1.385089,-0.353028,3.31615,-0.524087,-0.794327,3.936365,0.682989,-2.521211,0
4,a8f910ea6075b6376af079055965ff68,-0.203933,-0.177252,0.368074,-0.70132,-1.104391,0.73576,0.894273,-1.375826,-5.144946,-2.048711,0.629773,-4.252669,-0.08742,-0.794367,-1.063963,0.115997,0.89518,3.184848,2.05784,-0.950821,0.961059,-1.837828,-0.437156,-0.828433,0.373747,-0.099787,-0.97628,-0.165921,3.297221,3.914132,-4.971376,-0.28652,-0.160133,-3.301453,-1.021032,-0.562744,0.574065,-0.368194,-0.507458,...,0.178099,-0.410396,-1.184236,1.681727,0.589606,0.064222,0.258885,0.560241,-1.545597,0.822283,1.518209,0.460143,0.822488,1.362718,0.21856,-1.038514,1.000763,-0.975878,-0.551268,-0.133044,-0.393092,1.236473,1.6571,0.83302,0.665379,-0.900025,0.291908,0.482727,0.552399,0.970496,-0.279168,1.544356,2.959727,1.641201,-0.130818,-0.264292,-0.748668,0.964218,0.087079,0


## 2. Models

### 2.1. QDA with PL from GL+GMM

In [4]:
# Function that estimates mean and covariance using Graphical Lasso model
def get_mean_cov(x,y):
    model = GraphicalLasso(alpha=0.05)
    ones = (y==1).astype(bool)
    x2 = x[ones]
    model.fit(x2)
    p1 = model.precision_
    m1 = model.location_
    
    onesb = (y==0).astype(bool)
    x2b = x[onesb]
    model.fit(x2b)
    p2 = model.precision_
    m2 = model.location_
    
    ms = np.stack([m1, m2])
    ps = np.stack([p1, p2])
    return ms, ps

In [5]:
# Train GMM model

cols = [c for c in train.columns if c not in ['id', 'target']]
cols.remove('wheezy-copper-turtle-magic')
oof = np.zeros(len(train))
preds = np.zeros(len(test))

with single_threaded(np):
    for i in tqdm(range(512)):
        train2 = train[train['wheezy-copper-turtle-magic']==i]
        test2 = test[test['wheezy-copper-turtle-magic']==i]
        idx1 = train2.index; idx2 = test2.index
        train2.reset_index(drop=True,inplace=True)

        sel = VarianceThreshold(threshold=1.5).fit(train2[cols])
        train3 = sel.transform(train2[cols])
        test3 = sel.transform(test2[cols])

        skf = StratifiedKFold(n_splits=11, random_state=42, shuffle=True)
        for train_index, test_index in skf.split(train3, train2['target']):

            ms, ps = get_mean_cov(train3[train_index, :],train2.loc[train_index]['target'].values)

            gm = GaussianMixture(n_components=2, init_params='kmeans', covariance_type='full',
                                 tol=0.001,reg_covar=0.001, max_iter=100, n_init=1,
                                 means_init=ms, precisions_init=ps, random_state=1)
            gm.fit(np.concatenate([train3, test3], axis = 0))
            oof[idx1[test_index]] = gm.predict_proba(train3[test_index, :])[:, 0]
            preds[idx2] += gm.predict_proba(test3)[:, 0] / skf.n_splits

auc = roc_auc_score(train['target'], oof)
print('GMM CV: ',round(auc, 5))

100%|██████████| 512/512 [05:47<00:00,  1.58it/s]

GMM CV:  0.9675





In [6]:
# Collect numbers of useful features using Variance Threshold
# to use them in PCA as n_components

cat_dict = dict()

cols = [c for c in train.columns if c not in ['id', 'target']]
cols.remove('wheezy-copper-turtle-magic')

for i in range(512):
    train2 = train[train['wheezy-copper-turtle-magic']==i]
    test2 = test[test['wheezy-copper-turtle-magic']==i]
    idx1 = train2.index; idx2 = test2.index
    train2.reset_index(drop=True,inplace=True)
    
    sel = VarianceThreshold(threshold=1.5).fit(train2[cols])
    train3 = sel.transform(train2[cols])
    test3 = sel.transform(test2[cols])
        
    cat_dict[i] = train3.shape[1]

In [7]:
# Train QDA model with PL from GMM

test['target'] = preds
oof1 = np.zeros(len(train))
preds1 = np.zeros(len(test))

with single_threaded(np):
    for k in tqdm(range(512)):
        train2 = train[train['wheezy-copper-turtle-magic']==k] 
        train2p = train2.copy(); idx1 = train2.index 
        test2 = test[test['wheezy-copper-turtle-magic']==k]
        
        # Using pseudolabels with confidence <= 0.15 or >= 0.85
        test2p = test2[(test2['target']<=0.15) | (test2['target']>=0.85)].copy()
        test2p.loc[test2p['target']>=0.5, 'target'] = 1
        test2p.loc[test2p['target']<0.5, 'target'] = 0 
        train2p = pd.concat([train2p, test2p], axis=0)
        train2p.reset_index(drop=True, inplace=True)

        sel = VarianceThreshold(threshold=1.5)
        sel.fit(train2p[cols])
        train3p = sel.transform(train2p[cols])
        train3 = sel.transform(train2[cols])
        test3 = sel.transform(test2[cols])

        skf = StratifiedKFold(n_splits=11, random_state=42, shuffle=True)
        for train_index, test_index in skf.split(train3p, train2p['target']):
            test_index3 = test_index[test_index < len(train3)]
            clf = QuadraticDiscriminantAnalysis(reg_param=0.4)
            clf.fit(train3p[train_index, :],train2p.loc[train_index]['target'])
            oof1[idx1[test_index3]] += clf.predict_proba(train3[test_index3, :])[:, 1]

            preds1[test2.index] += clf.predict_proba(test3)[:, 1] / skf.n_splits
        
auc = roc_auc_score(train['target'], oof1)
print('Model 1 CV: ', round(auc, 5))

100%|██████████| 512/512 [00:45<00:00, 11.28it/s]

Model 1 CV:  0.9695





### 2.2. PCA+QDA with PL from GL+GMM

In [8]:
# Train PCA+QDA using previously computed predictions from GMM 
# and PCA n_components
test['target'] = preds
oof2 = np.zeros(len(train))
preds2 = np.zeros(len(test))

with single_threaded(np):
    for k in tqdm(range(512)):
        train2 = train[train['wheezy-copper-turtle-magic']==k] 
        train2p = train2.copy(); idx1 = train2.index 
        test2 = test[test['wheezy-copper-turtle-magic']==k]

        # Using pseudolabels with confidence <= 0.2 or >= 0.8
        test2p = test2[(test2['target']<=0.2) | (test2['target']>=0.8)].copy()
        test2p.loc[test2p['target']>=0.5, 'target'] = 1
        test2p.loc[test2p['target']<0.5, 'target'] = 0 
        train2p = pd.concat([train2p, test2p], axis=0)
        train2p.reset_index(drop=True, inplace=True)

        pca = PCA(n_components=cat_dict[k], random_state=1234)
        pca.fit(train2p[cols])
        train3p = pca.transform(train2p[cols])
        train3 = pca.transform(train2[cols])
        test3 = pca.transform(test2[cols])

        skf = StratifiedKFold(n_splits=11, random_state=42, shuffle=True)
        for train_index, test_index in skf.split(train3p, train2p['target']):
            test_index3 = test_index[test_index<len(train3)]
            clf = QuadraticDiscriminantAnalysis(reg_param=0.4)
            clf.fit(train3p[train_index, :],train2p.loc[train_index]['target'])
            oof2[idx1[test_index3]] += clf.predict_proba(train3[test_index3, :])[:, 1]
            preds2[test2.index] += clf.predict_proba(test3)[:, 1] / skf.n_splits

auc = roc_auc_score(train['target'], oof2)
print('Model 2 CV: ', round(auc, 5))

100%|██████████| 512/512 [00:56<00:00,  9.13it/s]

Model 2 CV:  0.96931





### 2.3. Bagging QDA with PL from Model 1

In [9]:
# Train QDA with bagging and PL from Model 1

test['target'] = preds1 
oof3 = np.zeros(len(train))
preds3 = np.zeros(len(test))

with single_threaded(np):
    for k in tqdm(range(512)):
        train2 = train[train['wheezy-copper-turtle-magic']==k] 
        train2p = train2.copy(); idx1 = train2.index 
        test2 = test[test['wheezy-copper-turtle-magic']==k]

        # Using all test data as pseudolabels
        test2p = test2.copy()
        test2p.loc[test2p['target']>=0.5, 'target'] = 1
        test2p.loc[test2p['target']<0.5, 'target'] = 0 
        train2p = pd.concat([train2p, test2p], axis=0)
        train2p.reset_index(drop=True, inplace=True)

        sel = VarianceThreshold(threshold=1.5).fit(train2p[cols])     
        train3p = sel.transform(train2p[cols])
        train3 = sel.transform(train2[cols])
        test3 = sel.transform(test2[cols])

        skf = StratifiedKFold(n_splits=11, random_state=42, shuffle=True)
        for train_index, test_index in skf.split(train3p, train2p['target']):
            test_index3 = test_index[test_index<len(train3)]
            clf = QuadraticDiscriminantAnalysis(reg_param=0.3)
            clf = BaggingClassifier(clf, n_estimators=200, random_state=333)
            clf.fit(train3p[train_index, :],train2p.loc[train_index]['target'])
            oof3[idx1[test_index3]] += clf.predict_proba(train3[test_index3, :])[:, 1]
            preds3[test2.index] += clf.predict_proba(test3)[:, 1] / skf.n_splits
        
auc = roc_auc_score(train['target'], oof3)
print('Model 3 CV: ', round(auc, 5))

100%|██████████| 512/512 [1:14:44<00:00,  8.67s/it]

Model 3 CV:  0.96985





### 2.4. QDA with iterative PL

In [10]:
# Train QDA with Grid Search on reg_param and iterative PL for 4 loops

oof4 = np.zeros(len(train))
preds4 = np.zeros(len(test))
params = [{'reg_param': [0.1, 0.2, 0.3, 0.4, 0.5]}]
reg_params = np.zeros(512)

with single_threaded(np):
    for i in tqdm(range(512)):
        train2 = train[train['wheezy-copper-turtle-magic']==i]
        test2 = test[test['wheezy-copper-turtle-magic']==i]
        idx1 = train2.index; idx2 = test2.index
        train2.reset_index(drop=True, inplace=True)

        data = pd.concat([pd.DataFrame(train2[cols]), pd.DataFrame(test2[cols])])
        pipe = Pipeline([('vt', VarianceThreshold(threshold=2)), ('scaler', StandardScaler())])
        data2 = pipe.fit_transform(data[cols])
        train3 = data2[:train2.shape[0]]; test3 = data2[train2.shape[0]:]

        skf = StratifiedKFold(n_splits=11, random_state=42)
        for train_index, test_index in skf.split(train2, train2['target']):
            qda = QuadraticDiscriminantAnalysis()
            clf = GridSearchCV(qda, params, cv=4)
            clf.fit(train3[train_index, :],train2.loc[train_index]['target'])
            reg_params[i] = clf.best_params_['reg_param']
            oof4[idx1[test_index]] = clf.predict_proba(train3[test_index,:])[:,1]
            preds4[idx2] += clf.predict_proba(test3)[:,1] / skf.n_splits

    for itr in range(4):
        test['target'] = preds4
        # Using pseudolabels with confidence < 0.045 or > 0.955
        test.loc[test['target'] > 0.955, 'target'] = 1
        test.loc[test['target'] < 0.045, 'target'] = 0
        usefull_test = test[(test['target'] == 1) | (test['target'] == 0)]
        new_train = pd.concat([train, usefull_test]).reset_index(drop=True)
        # Assign 0 or 1 to highly confident predictions
        new_train.loc[oof > 0.995, 'target'] = 1
        new_train.loc[oof < 0.005, 'target'] = 0
        oof4 = np.zeros(len(train))
        preds4 = np.zeros(len(test))
        for i in tqdm(range(512)):
            train2 = new_train[new_train['wheezy-copper-turtle-magic']==i]
            test2 = test[test['wheezy-copper-turtle-magic']==i]
            idx1 = train[train['wheezy-copper-turtle-magic']==i].index
            idx2 = test2.index
            train2.reset_index(drop=True,inplace=True)

            data = pd.concat([pd.DataFrame(train2[cols]), pd.DataFrame(test2[cols])])
            pipe = Pipeline([('vt', VarianceThreshold(threshold=2)), ('scaler', StandardScaler())])
            data2 = pipe.fit_transform(data[cols])
            train3 = data2[:train2.shape[0]]
            test3 = data2[train2.shape[0]:]

            skf = StratifiedKFold(n_splits=11, random_state=42)
            for train_index, test_index in skf.split(train2, train2['target']):
                oof_test_index = [t for t in test_index if t < len(idx1)]
                clf = QuadraticDiscriminantAnalysis(reg_params[i])
                clf.fit(train3[train_index,:],train2.loc[train_index]['target'])
                if len(oof_test_index) > 0:
                    oof4[idx1[oof_test_index]] = clf.predict_proba(train3[oof_test_index,:])[:,1]
                preds4[idx2] += clf.predict_proba(test3)[:,1] / skf.n_splits

auc = roc_auc_score(train['target'], oof4)
print('Model 4 CV: ', round(auc, 5))

100%|██████████| 512/512 [07:20<00:00,  1.15it/s]
100%|██████████| 512/512 [00:44<00:00, 11.39it/s]
100%|██████████| 512/512 [00:46<00:00, 11.05it/s]
100%|██████████| 512/512 [00:45<00:00, 10.97it/s]
100%|██████████| 512/512 [00:45<00:00, 11.63it/s]


Model 4 CV:  0.96949


## 3. Final submission

In [11]:
# A weighted average of all the models
oof = 0.65*(0.25*oof1 + 0.15*oof2 + 0.6*oof3) + 0.35*oof4
preds = 0.65*(0.25*preds1 + 0.15*preds2 + 0.6*preds3) + 0.35*preds4
auc = roc_auc_score(train['target'], oof)
print('Final submission CV: ', round(auc, 6))

Final submission CV:  0.970324
