In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.
from time import time
from tqdm import tqdm_notebook as tqdm
from collections import Counter
from scipy import stats
import lightgbm as lgb
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import KFold, StratifiedKFold
import gc
import json
pd.set_option('display.max_columns', 1000)

from pathlib import Path

In [3]:
path=Path('/kaggle/data_science_bowl')
path

PosixPath('/kaggle/data_science_bowl')

In [4]:
def read_data():
    train_df = pd.read_csv(path/'train.csv')
    test_df = pd.read_csv(path/'test.csv')
    train_labels_df = pd.read_csv(path/'train_labels.csv')
    specs_df = pd.read_csv(path/'specs.csv')
    return train_df, test_df, train_labels_df, specs_df

In [5]:
train_df, test_df, train_labels_df, specs_df = read_data()

In [6]:
train_df.columns

Index(['event_id', 'game_session', 'timestamp', 'event_data',
       'installation_id', 'event_count', 'event_code', 'game_time', 'title',
       'type', 'world'],
      dtype='object')

In [7]:
train_df.shape

(11341042, 11)

## Feature Engineering

### Cleanup

In [8]:
def remove_wrong_event_codes(df):
    return df[((df['title'] == 'Bird Measurer (Assessment)') & (df['event_code'] == 4100)) == False]

train_df = remove_wrong_event_codes(train_df)
test_df = remove_wrong_event_codes(test_df)

In [9]:
train_df.shape

(11338690, 11)

In [10]:
from functools import partial

def basic_feature_engineering(df, is_test=False):
    def process_event_code(is_correct, code, x):
            count = 0
            for xi in x:
                fields = json.loads(xi)
                if ('event_code' in fields) and (fields['event_code'] == code) and ('correct' in fields) \
    and (fields['correct'] == is_correct):
                    count += 1
            return count

    process_event_code_4100_true = partial(process_event_code, True, 4100)
    process_event_code_4100_true.__name__ = 'process_event_code_4100_true'
    process_event_code_4100_false = partial(process_event_code, False, 4100)
    process_event_code_4100_false.__name__ = 'process_event_code_4100_false'
    process_event_code_4110_true = partial(process_event_code, True, 4110)
    process_event_code_4110_true.__name__ = 'process_event_code_4110_true'
    process_event_code_4110_false = partial(process_event_code, False, 4110)
    process_event_code_4110_false.__name__ = 'process_event_code_4110_false'
    agg_ops = {'event_data': [process_event_code_4100_true, process_event_code_4100_false, \
                              process_event_code_4110_true, process_event_code_4110_false],\
              'type': [lambda types : sum([1 for t in types if t == 'Assessment'])],
              'game_time': ['mean', 'median', 'std'],
              'game_session': [lambda sessions : len(sessions)]}

    df_aggregated = df[:].groupby(['installation_id', 'game_session']).agg(agg_ops)

    df_aggregated = df_aggregated.reset_index()
    df_aggregated.columns = ['installation_id', 'game_session', 'process_event_code_4100_true', \
                                'process_event_code_4100_false', 'process_event_code_4110_true', 'process_event_code_4110_false',\
                               'type_assessment', 'game_time_mean', 'game_time_median', 'game_time_std', 'session_count']

    def accuracy_series(x):
        true_attempts = x['process_event_code_4100_true'] + x['process_event_code_4110_true']
        false_attempts = x['process_event_code_4100_false'] + x['process_event_code_4110_false']
        accuracy = true_attempts / (true_attempts + false_attempts) if (true_attempts + false_attempts) > 0 else 0
        accuracy_group = 1
        if accuracy == 0:
            accuracy_group = 0
        elif accuracy == 1:
            accuracy_group = 3
        elif accuracy == 0.5:
            accuracy_group = 2
        return pd.Series([true_attempts, false_attempts, accuracy, accuracy_group])

    df_aggregated[['true_attempts', 'false_attempts', 'accuracy', 'accuracy_group']] = df_aggregated\
    .apply(accuracy_series, axis=1)
    df_aggregated = df_aggregated.drop(['process_event_code_4100_true', 'process_event_code_4110_true', \
                           'process_event_code_4100_false', 'process_event_code_4110_false'], axis=1)
    if is_test == False:
        df_aggregated = df_aggregated[(df_aggregated['type_assessment'] > 0) & (df_aggregated['session_count'] > 1) & \
                     (df_aggregated['true_attempts'] + df_aggregated['false_attempts']> 0)]
    else:
        df_aggregated = df_aggregated[(df_aggregated['type_assessment'] > 0)]
    return df_aggregated

In [11]:
import re

def count_pivot(df, comp_df, item='title'):
    title_count_df = df.groupby(['installation_id', item])['event_id'].agg(['count'])\
    .reset_index()\
    .pivot_table(index = ['installation_id'], columns = [item], values = ['count'])\
    .fillna(0)
    title_count_df.columns = [f'{item}_{x[0]}_{x[1]}' for x in title_count_df.columns]
    title_count_df = title_count_df.reset_index()
    comp_df = comp_df.merge(title_count_df, on=['installation_id'], how='left')
    comp_df.columns = [re.sub(r'\W', '_', col,) for col in comp_df.columns]
    return comp_df

In [12]:
%%time
comp_train_df = basic_feature_engineering(train_df)

CPU times: user 3min 49s, sys: 976 ms, total: 3min 50s
Wall time: 3min 41s


In [18]:
comp_train_df

Unnamed: 0,installation_id,game_session,type_assessment,game_time_mean,game_time_median,game_time_std,session_count,true_attempts,false_attempts,accuracy,accuracy_group,title_count_12_Monkeys,title_count_Air_Show,title_count_All_Star_Sorting,title_count_Balancing_Act,title_count_Bird_Measurer__Assessment_,title_count_Bottle_Filler__Activity_,title_count_Bubble_Bath,title_count_Bug_Measurer__Activity_,title_count_Cart_Balancer__Assessment_,title_count_Cauldron_Filler__Assessment_,title_count_Chest_Sorter__Assessment_,title_count_Chicken_Balancer__Activity_,title_count_Chow_Time,title_count_Costume_Box,title_count_Crystal_Caves___Level_1,title_count_Crystal_Caves___Level_2,title_count_Crystal_Caves___Level_3,title_count_Crystals_Rule,title_count_Dino_Dive,title_count_Dino_Drink,title_count_Egg_Dropper__Activity_,title_count_Fireworks__Activity_,title_count_Flower_Waterer__Activity_,title_count_Happy_Camel,title_count_Heavy__Heavier__Heaviest,title_count_Honey_Cake,title_count_Leaf_Leader,title_count_Lifting_Heavy_Things,title_count_Magma_Peak___Level_1,title_count_Magma_Peak___Level_2,title_count_Mushroom_Sorter__Assessment_,title_count_Ordering_Spheres,title_count_Pan_Balance,title_count_Pirate_s_Tale,title_count_Rulers,title_count_Sandcastle_Builder__Activity_,title_count_Scrub_A_Dub,title_count_Slop_Problem,title_count_Treasure_Map,title_count_Tree_Top_City___Level_1,title_count_Tree_Top_City___Level_2,title_count_Tree_Top_City___Level_3,title_count_Watering_Hole__Activity_,title_count_Welcome_to_Lost_Lagoon_,type_count_Activity,type_count_Assessment,type_count_Clip,type_count_Game,world_count_CRYSTALCAVES,world_count_MAGMAPEAK,world_count_NONE,world_count_TREETOPCITY,event_code_count_2000,event_code_count_2010,event_code_count_2020,event_code_count_2025,event_code_count_2030,event_code_count_2035,event_code_count_2040,event_code_count_2050,event_code_count_2060,event_code_count_2070,event_code_count_2075,event_code_count_2080,event_code_count_2081,event_code_count_2083,event_code_count_3010,event_code_count_3020,event_code_count_3021,event_code_count_3110,event_code_count_3120,event_code_count_3121,event_code_count_4010,event_code_count_4020,event_code_count_4021,event_code_count_4022,event_code_count_4025,event_code_count_4030,event_code_count_4031,event_code_count_4035,event_code_count_4040,event_code_count_4045,event_code_count_4050,event_code_count_4070,event_code_count_4080,event_code_count_4090,event_code_count_4095,event_code_count_4100,event_code_count_4110,event_code_count_4220,event_code_count_4230,event_code_count_4235,event_code_count_5000,event_code_count_5010
0,0006a69f,6bdf9623adc94d89,35,10131.657143,8876.0,6939.796746,35,1.0,0.0,1.000000,3.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
1,0006a69f,77b8ee947eb84b4e,87,47771.655172,50155.0,26583.253504,87,0.0,11.0,0.000000,0.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
2,0006a69f,901acc108f55a5a1,48,17534.645833,17551.5,10403.406718,48,1.0,0.0,1.000000,3.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
3,0006a69f,9501794defd84e4d,42,12633.857143,12554.5,8721.338263,42,1.0,1.0,0.500000,2.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
4,0006a69f,a9ef3ecb3d1acc6a,31,16773.741935,16467.0,11514.032389,31,1.0,0.0,1.000000,3.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17685,ffc90c32,c996482b11d149dd,47,28709.617021,30747.0,16730.114588,47,1.0,0.0,1.000000,3.0,2.0,87.0,56.0,1.0,47.0,106.0,107.0,0.0,35.0,24.0,38.0,83.0,61.0,1.0,1.0,1.0,1.0,237.0,129.0,103.0,40.0,85.0,60.0,60.0,1.0,1.0,39.0,1.0,1.0,2.0,97.0,6.0,126.0,1.0,1.0,73.0,249.0,1.0,1.0,2.0,1.0,1.0,32.0,2.0,479.0,241.0,29.0,1254.0,489.0,827.0,2.0,685.0,53.0,5.0,63.0,5.0,56.0,3.0,4.0,3.0,4.0,1.0,3.0,6.0,2.0,4.0,269.0,16.0,68.0,266.0,16.0,66.0,12.0,195.0,13.0,11.0,37.0,216.0,11.0,21.0,20.0,2.0,0.0,517.0,0.0,0.0,2.0,18.0,1.0,5.0,2.0,2.0,3.0,2.0
17686,ffd2871d,b05a02b52d5c1f4c,40,27651.950000,28553.0,16658.486692,40,1.0,0.0,1.000000,3.0,0.0,0.0,0.0,2.0,0.0,638.0,200.0,0.0,0.0,40.0,0.0,206.0,167.0,0.0,1.0,1.0,1.0,0.0,109.0,223.0,128.0,0.0,0.0,169.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,84.0,0.0,0.0,410.0,63.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1382.0,40.0,13.0,1015.0,760.0,1687.0,1.0,2.0,32.0,1.0,83.0,4.0,73.0,2.0,4.0,3.0,3.0,3.0,0.0,8.0,1.0,7.0,261.0,32.0,61.0,258.0,32.0,59.0,11.0,272.0,30.0,1.0,29.0,373.0,24.0,79.0,32.0,7.0,0.0,627.0,0.0,3.0,5.0,2.0,0.0,4.0,12.0,12.0,0.0,0.0
17687,ffeb0b1b,5448d652309a6324,61,37381.295082,39276.0,20608.114221,61,1.0,2.0,0.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,941.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,97.0,13.0,0.0,0.0,0.0,381.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,381.0,247.0,24.0,941.0,944.0,534.0,3.0,112.0,37.0,2.0,31.0,1.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,159.0,155.0,39.0,157.0,154.0,39.0,8.0,147.0,31.0,0.0,7.0,278.0,0.0,84.0,15.0,0.0,0.0,208.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0
17688,ffeb0b1b,a6885ab824fbc32c,97,55649.969072,56499.0,35193.296374,97,0.0,1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,941.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,97.0,13.0,0.0,0.0,0.0,381.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,381.0,247.0,24.0,941.0,944.0,534.0,3.0,112.0,37.0,2.0,31.0,1.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,159.0,155.0,39.0,157.0,154.0,39.0,8.0,147.0,31.0,0.0,7.0,278.0,0.0,84.0,15.0,0.0,0.0,208.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
%%time
counter_fields = ['title', 'type', 'world', 'event_code']
for cf in counter_fields:
    comp_train_df = count_pivot(train_df, comp_train_df, cf)

CPU times: user 9.57 s, sys: 508 ms, total: 10.1 s
Wall time: 4.69 s


In [20]:
comp_train_df

Unnamed: 0,installation_id,game_session,type_assessment,game_time_mean,game_time_median,game_time_std,session_count,true_attempts,false_attempts,accuracy,accuracy_group,title_count_12_Monkeys,title_count_Air_Show,title_count_All_Star_Sorting,title_count_Balancing_Act,title_count_Bird_Measurer__Assessment_,title_count_Bottle_Filler__Activity_,title_count_Bubble_Bath,title_count_Bug_Measurer__Activity_,title_count_Cart_Balancer__Assessment_,title_count_Cauldron_Filler__Assessment_,title_count_Chest_Sorter__Assessment_,title_count_Chicken_Balancer__Activity_,title_count_Chow_Time,title_count_Costume_Box,title_count_Crystal_Caves___Level_1,title_count_Crystal_Caves___Level_2,title_count_Crystal_Caves___Level_3,title_count_Crystals_Rule,title_count_Dino_Dive,title_count_Dino_Drink,title_count_Egg_Dropper__Activity_,title_count_Fireworks__Activity_,title_count_Flower_Waterer__Activity_,title_count_Happy_Camel,title_count_Heavy__Heavier__Heaviest,title_count_Honey_Cake,title_count_Leaf_Leader,title_count_Lifting_Heavy_Things,title_count_Magma_Peak___Level_1,title_count_Magma_Peak___Level_2,title_count_Mushroom_Sorter__Assessment_,title_count_Ordering_Spheres,title_count_Pan_Balance,title_count_Pirate_s_Tale,title_count_Rulers_x,title_count_Sandcastle_Builder__Activity_,title_count_Scrub_A_Dub,title_count_Slop_Problem,title_count_Treasure_Map,title_count_Tree_Top_City___Level_1,title_count_Tree_Top_City___Level_2,title_count_Tree_Top_City___Level_3,title_count_Watering_Hole__Activity_,title_count_Welcome_to_Lost_Lagoon_,type_count_Activity_x,type_count_Assessment_x,type_count_Clip_x,type_count_Game_x,world_count_CRYSTALCAVES_x,world_count_MAGMAPEAK_x,world_count_NONE_x,world_count_TREETOPCITY_x,event_code_count_2000_x,event_code_count_2010_x,event_code_count_2020_x,event_code_count_2025_x,event_code_count_2030_x,event_code_count_2035_x,event_code_count_2040_x,event_code_count_2050_x,event_code_count_2060_x,event_code_count_2070_x,event_code_count_2075_x,event_code_count_2080_x,event_code_count_2081_x,event_code_count_2083_x,event_code_count_3010_x,event_code_count_3020_x,event_code_count_3021_x,event_code_count_3110_x,event_code_count_3120_x,event_code_count_3121_x,event_code_count_4010_x,event_code_count_4020_x,event_code_count_4021_x,event_code_count_4022_x,event_code_count_4025_x,event_code_count_4030_x,event_code_count_4031_x,event_code_count_4035_x,event_code_count_4040_x,event_code_count_4045_x,event_code_count_4050_x,event_code_count_4070_x,event_code_count_4080_x,event_code_count_4090_x,event_code_count_4095_x,event_code_count_4100_x,event_code_count_4110_x,event_code_count_4220_x,event_code_count_4230_x,event_code_count_4235_x,event_code_count_5000_x,event_code_count_5010_x,title_count_12_Monkeys.1,title_count_Air_Show.1,title_count_All_Star_Sorting.1,title_count_Balancing_Act.1,title_count_Bird_Measurer__Assessment_.1,title_count_Bottle_Filler__Activity_.1,title_count_Bubble_Bath.1,title_count_Bug_Measurer__Activity_.1,title_count_Cart_Balancer__Assessment_.1,title_count_Cauldron_Filler__Assessment_.1,title_count_Chest_Sorter__Assessment_.1,title_count_Chicken_Balancer__Activity_.1,title_count_Chow_Time.1,title_count_Costume_Box.1,title_count_Crystal_Caves___Level_1.1,title_count_Crystal_Caves___Level_2.1,title_count_Crystal_Caves___Level_3.1,title_count_Crystals_Rule.1,title_count_Dino_Dive.1,title_count_Dino_Drink.1,title_count_Egg_Dropper__Activity_.1,title_count_Fireworks__Activity_.1,title_count_Flower_Waterer__Activity_.1,title_count_Happy_Camel.1,title_count_Heavy__Heavier__Heaviest.1,title_count_Honey_Cake.1,title_count_Leaf_Leader.1,title_count_Lifting_Heavy_Things.1,title_count_Magma_Peak___Level_1.1,title_count_Magma_Peak___Level_2.1,title_count_Mushroom_Sorter__Assessment_.1,title_count_Ordering_Spheres.1,title_count_Pan_Balance.1,title_count_Pirate_s_Tale.1,title_count_Rulers_y,title_count_Sandcastle_Builder__Activity_.1,title_count_Scrub_A_Dub.1,title_count_Slop_Problem.1,title_count_Treasure_Map.1,title_count_Tree_Top_City___Level_1.1,title_count_Tree_Top_City___Level_2.1,title_count_Tree_Top_City___Level_3.1,title_count_Watering_Hole__Activity_.1,title_count_Welcome_to_Lost_Lagoon_.1,type_count_Activity_y,type_count_Assessment_y,type_count_Clip_y,type_count_Game_y,world_count_CRYSTALCAVES_y,world_count_MAGMAPEAK_y,world_count_NONE_y,world_count_TREETOPCITY_y,event_code_count_2000_y,event_code_count_2010_y,event_code_count_2020_y,event_code_count_2025_y,event_code_count_2030_y,event_code_count_2035_y,event_code_count_2040_y,event_code_count_2050_y,event_code_count_2060_y,event_code_count_2070_y,event_code_count_2075_y,event_code_count_2080_y,event_code_count_2081_y,event_code_count_2083_y,event_code_count_3010_y,event_code_count_3020_y,event_code_count_3021_y,event_code_count_3110_y,event_code_count_3120_y,event_code_count_3121_y,event_code_count_4010_y,event_code_count_4020_y,event_code_count_4021_y,event_code_count_4022_y,event_code_count_4025_y,event_code_count_4030_y,event_code_count_4031_y,event_code_count_4035_y,event_code_count_4040_y,event_code_count_4045_y,event_code_count_4050_y,event_code_count_4070_y,event_code_count_4080_y,event_code_count_4090_y,event_code_count_4095_y,event_code_count_4100_y,event_code_count_4110_y,event_code_count_4220_y,event_code_count_4230_y,event_code_count_4235_y,event_code_count_5000_y,event_code_count_5010_y
0,0006a69f,6bdf9623adc94d89,35,10131.657143,8876.0,6939.796746,35,1.0,0.0,1.000000,3.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
1,0006a69f,77b8ee947eb84b4e,87,47771.655172,50155.0,26583.253504,87,0.0,11.0,0.000000,0.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
2,0006a69f,901acc108f55a5a1,48,17534.645833,17551.5,10403.406718,48,1.0,0.0,1.000000,3.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
3,0006a69f,9501794defd84e4d,42,12633.857143,12554.5,8721.338263,42,1.0,1.0,0.500000,2.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
4,0006a69f,a9ef3ecb3d1acc6a,31,16773.741935,16467.0,11514.032389,31,1.0,0.0,1.000000,3.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0,2.0,295.0,203.0,0.0,118.0,293.0,248.0,319.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,212.0,64.0,286.0,0.0,299.0,278.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,142.0,2.0,0.0,2.0,2.0,434.0,424.0,6.0,3.0,2.0,2.0,2.0,148.0,4.0,1771.0,260.0,37.0,1732.0,0.0,1910.0,4.0,1886.0,80.0,4.0,112.0,12.0,97.0,8.0,21.0,18.0,7.0,2.0,3.0,17.0,6.0,10.0,620.0,34.0,89.0,610.0,34.0,89.0,19.0,404.0,67.0,45.0,128.0,471.0,25.0,35.0,18.0,4.0,0.0,592.0,0.0,4.0,2.0,12.0,14.0,13.0,25.0,25.0,12.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17685,ffc90c32,c996482b11d149dd,47,28709.617021,30747.0,16730.114588,47,1.0,0.0,1.000000,3.0,2.0,87.0,56.0,1.0,47.0,106.0,107.0,0.0,35.0,24.0,38.0,83.0,61.0,1.0,1.0,1.0,1.0,237.0,129.0,103.0,40.0,85.0,60.0,60.0,1.0,1.0,39.0,1.0,1.0,2.0,97.0,6.0,126.0,1.0,1.0,73.0,249.0,1.0,1.0,2.0,1.0,1.0,32.0,2.0,479.0,241.0,29.0,1254.0,489.0,827.0,2.0,685.0,53.0,5.0,63.0,5.0,56.0,3.0,4.0,3.0,4.0,1.0,3.0,6.0,2.0,4.0,269.0,16.0,68.0,266.0,16.0,66.0,12.0,195.0,13.0,11.0,37.0,216.0,11.0,21.0,20.0,2.0,0.0,517.0,0.0,0.0,2.0,18.0,1.0,5.0,2.0,2.0,3.0,2.0,2.0,87.0,56.0,1.0,47.0,106.0,107.0,0.0,35.0,24.0,38.0,83.0,61.0,1.0,1.0,1.0,1.0,237.0,129.0,103.0,40.0,85.0,60.0,60.0,1.0,1.0,39.0,1.0,1.0,2.0,97.0,6.0,126.0,1.0,1.0,73.0,249.0,1.0,1.0,2.0,1.0,1.0,32.0,2.0,479.0,241.0,29.0,1254.0,489.0,827.0,2.0,685.0,53.0,5.0,63.0,5.0,56.0,3.0,4.0,3.0,4.0,1.0,3.0,6.0,2.0,4.0,269.0,16.0,68.0,266.0,16.0,66.0,12.0,195.0,13.0,11.0,37.0,216.0,11.0,21.0,20.0,2.0,0.0,517.0,0.0,0.0,2.0,18.0,1.0,5.0,2.0,2.0,3.0,2.0
17686,ffd2871d,b05a02b52d5c1f4c,40,27651.950000,28553.0,16658.486692,40,1.0,0.0,1.000000,3.0,0.0,0.0,0.0,2.0,0.0,638.0,200.0,0.0,0.0,40.0,0.0,206.0,167.0,0.0,1.0,1.0,1.0,0.0,109.0,223.0,128.0,0.0,0.0,169.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,84.0,0.0,0.0,410.0,63.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1382.0,40.0,13.0,1015.0,760.0,1687.0,1.0,2.0,32.0,1.0,83.0,4.0,73.0,2.0,4.0,3.0,3.0,3.0,0.0,8.0,1.0,7.0,261.0,32.0,61.0,258.0,32.0,59.0,11.0,272.0,30.0,1.0,29.0,373.0,24.0,79.0,32.0,7.0,0.0,627.0,0.0,3.0,5.0,2.0,0.0,4.0,12.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,638.0,200.0,0.0,0.0,40.0,0.0,206.0,167.0,0.0,1.0,1.0,1.0,0.0,109.0,223.0,128.0,0.0,0.0,169.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,84.0,0.0,0.0,410.0,63.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1382.0,40.0,13.0,1015.0,760.0,1687.0,1.0,2.0,32.0,1.0,83.0,4.0,73.0,2.0,4.0,3.0,3.0,3.0,0.0,8.0,1.0,7.0,261.0,32.0,61.0,258.0,32.0,59.0,11.0,272.0,30.0,1.0,29.0,373.0,24.0,79.0,32.0,7.0,0.0,627.0,0.0,3.0,5.0,2.0,0.0,4.0,12.0,12.0,0.0,0.0
17687,ffeb0b1b,5448d652309a6324,61,37381.295082,39276.0,20608.114221,61,1.0,2.0,0.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,941.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,97.0,13.0,0.0,0.0,0.0,381.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,381.0,247.0,24.0,941.0,944.0,534.0,3.0,112.0,37.0,2.0,31.0,1.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,159.0,155.0,39.0,157.0,154.0,39.0,8.0,147.0,31.0,0.0,7.0,278.0,0.0,84.0,15.0,0.0,0.0,208.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,941.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,97.0,13.0,0.0,0.0,0.0,381.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,381.0,247.0,24.0,941.0,944.0,534.0,3.0,112.0,37.0,2.0,31.0,1.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,159.0,155.0,39.0,157.0,154.0,39.0,8.0,147.0,31.0,0.0,7.0,278.0,0.0,84.0,15.0,0.0,0.0,208.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0
17688,ffeb0b1b,a6885ab824fbc32c,97,55649.969072,56499.0,35193.296374,97,0.0,1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,941.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,97.0,13.0,0.0,0.0,0.0,381.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,381.0,247.0,24.0,941.0,944.0,534.0,3.0,112.0,37.0,2.0,31.0,1.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,159.0,155.0,39.0,157.0,154.0,39.0,8.0,147.0,31.0,0.0,7.0,278.0,0.0,84.0,15.0,0.0,0.0,208.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,941.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,97.0,13.0,0.0,0.0,0.0,381.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,381.0,247.0,24.0,941.0,944.0,534.0,3.0,112.0,37.0,2.0,31.0,1.0,27.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,159.0,155.0,39.0,157.0,154.0,39.0,8.0,147.0,31.0,0.0,7.0,278.0,0.0,84.0,15.0,0.0,0.0,208.0,0.0,0.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
%%time
comp_test_df = basic_feature_engineering(test_df, is_test=True)

CPU times: user 20.9 s, sys: 64 ms, total: 21 s
Wall time: 21 s


In [28]:
comp_test_df

Unnamed: 0,installation_id,game_session,type_assessment,game_time_mean,game_time_median,game_time_std,session_count,true_attempts,false_attempts,accuracy,accuracy_group
7,00abaee7,348d7f09f96af313,1,0.000000,0.0,,1,0.0,0.0,0.00,0.0
17,00abaee7,8b38fc0d2fd315dc,26,14546.846154,14603.0,9021.999139,26,1.0,0.0,1.00,3.0
27,01242218,009c890ce6c4f3e3,36,16310.250000,16975.5,10795.687936,36,1.0,1.0,0.50,2.0
34,01242218,1fef5d54cb4b775a,1,0.000000,0.0,,1,0.0,0.0,0.00,0.0
35,01242218,31423dbcd717919e,51,18458.980392,18762.0,11421.162085,51,1.0,1.0,0.50,2.0
...,...,...,...,...,...,...,...,...,...,...,...
28428,ffe00ca8,70336ec581799feb,37,14833.837838,15288.0,9005.323998,37,1.0,1.0,0.50,2.0
28430,ffe00ca8,8d0fdec0ad44aefb,1,0.000000,0.0,,1,0.0,0.0,0.00,0.0
28436,ffe00ca8,c116d9e6f8cf85c3,68,19641.750000,16763.5,12298.955834,68,1.0,3.0,0.25,1.0
28441,ffe774cc,46ff9d3ad2be09f2,41,15342.048780,12576.0,10042.006679,41,1.0,0.0,1.00,3.0


In [29]:
for cf in counter_fields:
    comp_test_df = count_pivot(test_df, comp_test_df, cf)

In [30]:
comp_test_df

Unnamed: 0,installation_id,game_session,type_assessment,game_time_mean,game_time_median,game_time_std,session_count,true_attempts,false_attempts,accuracy,accuracy_group,title_count_12_Monkeys,title_count_Air_Show,title_count_All_Star_Sorting,title_count_Balancing_Act,title_count_Bird_Measurer__Assessment_,title_count_Bottle_Filler__Activity_,title_count_Bubble_Bath,title_count_Bug_Measurer__Activity_,title_count_Cart_Balancer__Assessment_,title_count_Cauldron_Filler__Assessment_,title_count_Chest_Sorter__Assessment_,title_count_Chicken_Balancer__Activity_,title_count_Chow_Time,title_count_Costume_Box,title_count_Crystal_Caves___Level_1,title_count_Crystal_Caves___Level_2,title_count_Crystal_Caves___Level_3,title_count_Crystals_Rule,title_count_Dino_Dive,title_count_Dino_Drink,title_count_Egg_Dropper__Activity_,title_count_Fireworks__Activity_,title_count_Flower_Waterer__Activity_,title_count_Happy_Camel,title_count_Heavy__Heavier__Heaviest,title_count_Honey_Cake,title_count_Leaf_Leader,title_count_Lifting_Heavy_Things,title_count_Magma_Peak___Level_1,title_count_Magma_Peak___Level_2,title_count_Mushroom_Sorter__Assessment_,title_count_Ordering_Spheres,title_count_Pan_Balance,title_count_Pirate_s_Tale,title_count_Rulers,title_count_Sandcastle_Builder__Activity_,title_count_Scrub_A_Dub,title_count_Slop_Problem,title_count_Treasure_Map,title_count_Tree_Top_City___Level_1,title_count_Tree_Top_City___Level_2,title_count_Tree_Top_City___Level_3,title_count_Watering_Hole__Activity_,title_count_Welcome_to_Lost_Lagoon_,type_count_Activity,type_count_Assessment,type_count_Clip,type_count_Game,world_count_CRYSTALCAVES,world_count_MAGMAPEAK,world_count_NONE,world_count_TREETOPCITY,event_code_count_2000,event_code_count_2010,event_code_count_2020,event_code_count_2025,event_code_count_2030,event_code_count_2035,event_code_count_2040,event_code_count_2050,event_code_count_2060,event_code_count_2070,event_code_count_2075,event_code_count_2080,event_code_count_2081,event_code_count_2083,event_code_count_3010,event_code_count_3020,event_code_count_3021,event_code_count_3110,event_code_count_3120,event_code_count_3121,event_code_count_4010,event_code_count_4020,event_code_count_4021,event_code_count_4022,event_code_count_4025,event_code_count_4030,event_code_count_4031,event_code_count_4035,event_code_count_4040,event_code_count_4045,event_code_count_4050,event_code_count_4070,event_code_count_4080,event_code_count_4090,event_code_count_4095,event_code_count_4100,event_code_count_4110,event_code_count_4220,event_code_count_4230,event_code_count_4235,event_code_count_5000,event_code_count_5010
0,00abaee7,348d7f09f96af313,1,0.000000,0.0,,1,0.0,0.0,0.00,0.0,2.0,0.0,79.0,1.0,0.0,0.0,0.0,26.0,26.0,1.0,0.0,0.0,159.0,1.0,1.0,2.0,1.0,0.0,135.0,0.0,61.0,264.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,103.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,454.0,27.0,14.0,373.0,253.0,241.0,1.0,373.0,26.0,1.0,11.0,1.0,7.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,111.0,33.0,9.0,108.0,33.0,9.0,3.0,120.0,12.0,0.0,10.0,148.0,0.0,33.0,0.0,0.0,0.0,190.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,00abaee7,8b38fc0d2fd315dc,26,14546.846154,14603.0,9021.999139,26,1.0,0.0,1.00,3.0,2.0,0.0,79.0,1.0,0.0,0.0,0.0,26.0,26.0,1.0,0.0,0.0,159.0,1.0,1.0,2.0,1.0,0.0,135.0,0.0,61.0,264.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,103.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,454.0,27.0,14.0,373.0,253.0,241.0,1.0,373.0,26.0,1.0,11.0,1.0,7.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,111.0,33.0,9.0,108.0,33.0,9.0,3.0,120.0,12.0,0.0,10.0,148.0,0.0,33.0,0.0,0.0,0.0,190.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,01242218,009c890ce6c4f3e3,36,16310.250000,16975.5,10795.687936,36,1.0,1.0,0.50,2.0,1.0,72.0,56.0,3.0,59.0,221.0,51.0,146.0,14.0,36.0,83.0,226.0,77.0,1.0,1.0,2.0,1.0,230.0,79.0,111.0,186.0,197.0,160.0,123.0,1.0,2.0,40.0,2.0,2.0,2.0,51.0,1.0,124.0,1.0,1.0,179.0,126.0,0.0,1.0,2.0,2.0,1.0,41.0,2.0,1356.0,243.0,29.0,1089.0,885.0,848.0,2.0,982.0,58.0,4.0,85.0,2.0,80.0,2.0,6.0,6.0,4.0,1.0,3.0,10.0,2.0,7.0,418.0,25.0,72.0,416.0,25.0,72.0,12.0,325.0,26.0,36.0,94.0,407.0,11.0,49.0,25.0,6.0,0.0,385.0,0.0,2.0,0.0,21.0,3.0,9.0,0.0,0.0,4.0,4.0
3,01242218,1fef5d54cb4b775a,1,0.000000,0.0,,1,0.0,0.0,0.00,0.0,1.0,72.0,56.0,3.0,59.0,221.0,51.0,146.0,14.0,36.0,83.0,226.0,77.0,1.0,1.0,2.0,1.0,230.0,79.0,111.0,186.0,197.0,160.0,123.0,1.0,2.0,40.0,2.0,2.0,2.0,51.0,1.0,124.0,1.0,1.0,179.0,126.0,0.0,1.0,2.0,2.0,1.0,41.0,2.0,1356.0,243.0,29.0,1089.0,885.0,848.0,2.0,982.0,58.0,4.0,85.0,2.0,80.0,2.0,6.0,6.0,4.0,1.0,3.0,10.0,2.0,7.0,418.0,25.0,72.0,416.0,25.0,72.0,12.0,325.0,26.0,36.0,94.0,407.0,11.0,49.0,25.0,6.0,0.0,385.0,0.0,2.0,0.0,21.0,3.0,9.0,0.0,0.0,4.0,4.0
4,01242218,31423dbcd717919e,51,18458.980392,18762.0,11421.162085,51,1.0,1.0,0.50,2.0,1.0,72.0,56.0,3.0,59.0,221.0,51.0,146.0,14.0,36.0,83.0,226.0,77.0,1.0,1.0,2.0,1.0,230.0,79.0,111.0,186.0,197.0,160.0,123.0,1.0,2.0,40.0,2.0,2.0,2.0,51.0,1.0,124.0,1.0,1.0,179.0,126.0,0.0,1.0,2.0,2.0,1.0,41.0,2.0,1356.0,243.0,29.0,1089.0,885.0,848.0,2.0,982.0,58.0,4.0,85.0,2.0,80.0,2.0,6.0,6.0,4.0,1.0,3.0,10.0,2.0,7.0,418.0,25.0,72.0,416.0,25.0,72.0,12.0,325.0,26.0,36.0,94.0,407.0,11.0,49.0,25.0,6.0,0.0,385.0,0.0,2.0,0.0,21.0,3.0,9.0,0.0,0.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3342,ffe00ca8,70336ec581799feb,37,14833.837838,15288.0,9005.323998,37,1.0,1.0,0.50,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,110.0,1.0,109.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,123.0,110.0,11.0,15.0,5.0,139.0,1.0,114.0,19.0,0.0,6.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,44.0,4.0,4.0,42.0,4.0,4.0,1.0,12.0,0.0,0.0,6.0,37.0,0.0,13.0,6.0,0.0,0.0,45.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0
3343,ffe00ca8,8d0fdec0ad44aefb,1,0.000000,0.0,,1,0.0,0.0,0.00,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,110.0,1.0,109.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,123.0,110.0,11.0,15.0,5.0,139.0,1.0,114.0,19.0,0.0,6.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,44.0,4.0,4.0,42.0,4.0,4.0,1.0,12.0,0.0,0.0,6.0,37.0,0.0,13.0,6.0,0.0,0.0,45.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0
3344,ffe00ca8,c116d9e6f8cf85c3,68,19641.750000,16763.5,12298.955834,68,1.0,3.0,0.25,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,110.0,1.0,109.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,123.0,110.0,11.0,15.0,5.0,139.0,1.0,114.0,19.0,0.0,6.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,44.0,4.0,4.0,42.0,4.0,4.0,1.0,12.0,0.0,0.0,6.0,37.0,0.0,13.0,6.0,0.0,0.0,45.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0
3345,ffe774cc,46ff9d3ad2be09f2,41,15342.048780,12576.0,10042.006679,41,1.0,0.0,1.00,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,41.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,42.0,3.0,0.0,0.0,0.0,0.0,45.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,4.0,0.0,4.0,0.0,4.0,0.0,0.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
comp_test_df = test_df.groupby(['installation_id']).last().reset_index()[['installation_id', 'game_session']]\
.merge(comp_test_df, on=['installation_id', 'game_session'], how='left').fillna(0)

In [33]:
comp_test_df

Unnamed: 0,installation_id,game_session,type_assessment,game_time_mean,game_time_median,game_time_std,session_count,true_attempts,false_attempts,accuracy,accuracy_group,title_count_12_Monkeys,title_count_Air_Show,title_count_All_Star_Sorting,title_count_Balancing_Act,title_count_Bird_Measurer__Assessment_,title_count_Bottle_Filler__Activity_,title_count_Bubble_Bath,title_count_Bug_Measurer__Activity_,title_count_Cart_Balancer__Assessment_,title_count_Cauldron_Filler__Assessment_,title_count_Chest_Sorter__Assessment_,title_count_Chicken_Balancer__Activity_,title_count_Chow_Time,title_count_Costume_Box,title_count_Crystal_Caves___Level_1,title_count_Crystal_Caves___Level_2,title_count_Crystal_Caves___Level_3,title_count_Crystals_Rule,title_count_Dino_Dive,title_count_Dino_Drink,title_count_Egg_Dropper__Activity_,title_count_Fireworks__Activity_,title_count_Flower_Waterer__Activity_,title_count_Happy_Camel,title_count_Heavy__Heavier__Heaviest,title_count_Honey_Cake,title_count_Leaf_Leader,title_count_Lifting_Heavy_Things,title_count_Magma_Peak___Level_1,title_count_Magma_Peak___Level_2,title_count_Mushroom_Sorter__Assessment_,title_count_Ordering_Spheres,title_count_Pan_Balance,title_count_Pirate_s_Tale,title_count_Rulers,title_count_Sandcastle_Builder__Activity_,title_count_Scrub_A_Dub,title_count_Slop_Problem,title_count_Treasure_Map,title_count_Tree_Top_City___Level_1,title_count_Tree_Top_City___Level_2,title_count_Tree_Top_City___Level_3,title_count_Watering_Hole__Activity_,title_count_Welcome_to_Lost_Lagoon_,type_count_Activity,type_count_Assessment,type_count_Clip,type_count_Game,world_count_CRYSTALCAVES,world_count_MAGMAPEAK,world_count_NONE,world_count_TREETOPCITY,event_code_count_2000,event_code_count_2010,event_code_count_2020,event_code_count_2025,event_code_count_2030,event_code_count_2035,event_code_count_2040,event_code_count_2050,event_code_count_2060,event_code_count_2070,event_code_count_2075,event_code_count_2080,event_code_count_2081,event_code_count_2083,event_code_count_3010,event_code_count_3020,event_code_count_3021,event_code_count_3110,event_code_count_3120,event_code_count_3121,event_code_count_4010,event_code_count_4020,event_code_count_4021,event_code_count_4022,event_code_count_4025,event_code_count_4030,event_code_count_4031,event_code_count_4035,event_code_count_4040,event_code_count_4045,event_code_count_4050,event_code_count_4070,event_code_count_4080,event_code_count_4090,event_code_count_4095,event_code_count_4100,event_code_count_4110,event_code_count_4220,event_code_count_4230,event_code_count_4235,event_code_count_5000,event_code_count_5010
0,00abaee7,348d7f09f96af313,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,2.0,0.0,79.0,1.0,0.0,0.0,0.0,26.0,26.0,1.0,0.0,0.0,159.0,1.0,1.0,2.0,1.0,0.0,135.0,0.0,61.0,264.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,103.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,454.0,27.0,14.0,373.0,253.0,241.0,1.0,373.0,26.0,1.0,11.0,1.0,7.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,111.0,33.0,9.0,108.0,33.0,9.0,3.0,120.0,12.0,0.0,10.0,148.0,0.0,33.0,0.0,0.0,0.0,190.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,01242218,1fef5d54cb4b775a,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,1.0,72.0,56.0,3.0,59.0,221.0,51.0,146.0,14.0,36.0,83.0,226.0,77.0,1.0,1.0,2.0,1.0,230.0,79.0,111.0,186.0,197.0,160.0,123.0,1.0,2.0,40.0,2.0,2.0,2.0,51.0,1.0,124.0,1.0,1.0,179.0,126.0,0.0,1.0,2.0,2.0,1.0,41.0,2.0,1356.0,243.0,29.0,1089.0,885.0,848.0,2.0,982.0,58.0,4.0,85.0,2.0,80.0,2.0,6.0,6.0,4.0,1.0,3.0,10.0,2.0,7.0,418.0,25.0,72.0,416.0,25.0,72.0,12.0,325.0,26.0,36.0,94.0,407.0,11.0,49.0,25.0,6.0,0.0,385.0,0.0,2.0,0.0,21.0,3.0,9.0,0.0,0.0,4.0,4.0
2,017c5718,4b165a330a0bdd6c,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,143.0,1.0,6.0,0.0,0.0,0.0,4.0,146.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,24.0,0.0,0.0,0.0,15.0,0.0,20.0,4.0,40.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,01a44906,be0b655ad1fee30c,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,1.0,0.0,78.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.0,108.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,145.0,1.0,10.0,78.0,0.0,0.0,3.0,231.0,14.0,0.0,3.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,2.0,3.0,21.0,2.0,3.0,1.0,31.0,0.0,29.0,9.0,61.0,0.0,0.0,0.0,0.0,0.0,28.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,01bc6cb6,46e8bbed71df7520,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,420.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,226.0,158.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,130.0,1.0,1.0,0.0,1.0,2.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,226.0,1.0,17.0,708.0,522.0,3.0,3.0,424.0,25.0,0.0,28.0,9.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,75.0,27.0,27.0,73.0,27.0,27.0,6.0,146.0,0.0,3.0,0.0,231.0,0.0,62.0,16.0,4.0,0.0,134.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,fee254cf,d0ea3550fd72f6c1,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,92.0,0.0,81.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,115.0,4.0,92.0,0.0,1.0,1.0,209.0,9.0,2.0,8.0,2.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,1.0,8.0,19.0,1.0,8.0,1.0,22.0,0.0,0.0,19.0,39.0,0.0,0.0,1.0,0.0,0.0,41.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
996,ff57e602,d22d3b1442967ba7,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,28.0,0.0,1.0,75.0,57.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,52.0,0.0,0.0,1.0,0.0,79.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,127.0,29.0,11.0,136.0,246.0,0.0,1.0,56.0,17.0,1.0,7.0,0.0,7.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,23.0,8.0,8.0,21.0,8.0,8.0,2.0,65.0,0.0,1.0,0.0,54.0,0.0,11.0,0.0,0.0,0.0,56.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
997,ffc73fb2,fd5e0fd3fe28f907,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,70.0,139.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,182.0,0.0,0.0,1.0,3.0,0.0,4.0,1.0,4.0,46.0,1.0,0.0,1.0,3.0,56.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,238.0,256.0,32.0,0.0,154.0,132.0,1.0,239.0,39.0,3.0,6.0,1.0,5.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,48.0,6.0,10.0,48.0,6.0,10.0,0.0,53.0,4.0,0.0,23.0,97.0,0.0,15.0,6.0,0.0,0.0,136.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0
998,ffe00ca8,8d0fdec0ad44aefb,1,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,110.0,1.0,109.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,123.0,110.0,11.0,15.0,5.0,139.0,1.0,114.0,19.0,0.0,6.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,44.0,4.0,4.0,42.0,4.0,4.0,1.0,12.0,0.0,0.0,6.0,37.0,0.0,13.0,6.0,0.0,0.0,45.0,0.0,2.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0


## Training

In [20]:
# quadratic weighted kappa
def qwk3(a1, a2, max_rat=3):
    '''
    a1 - ground truth
    a2 - predicted values
    '''
    assert(len(a1) == len(a2))
    a1 = np.asarray(a1, dtype=int)
    a2 = np.asarray(a2, dtype=int)
    hist1 = np.zeros((max_rat + 1, ))
    hist2 = np.zeros((max_rat + 1, ))
    o = 0
    for k in range(a1.shape[0]):
        i, j = a1[k], a2[k]
        hist1[i] += 1
        hist2[j] += 1
        o +=  (i - j) * (i - j)
    e = 0
    for i in range(max_rat + 1):
        for j in range(max_rat + 1):
            e += hist1[i] * hist2[j] * (i - j) * (i - j)
    e = e / a1.shape[0]
    return 1 - o / e

In [21]:
features = [i for i in comp_train_df.columns if i not in ['accuracy_group', 'installation_id', 'game_session']]
target = 'accuracy_group'
num_splits = 10
params = {
    'learning_rate': 0.007,
    'metric': 'multiclass',
    'objective': 'multiclass',
    'num_classes': 4,
    'feature_fraction': 0.75,
    "bagging_fraction": 0.8,
    "bagging_seed": 42
}

early_stopping_rounds = 100
num_boost_round = 700

def train_model(comp_train_df):
    
    kf = KFold(n_splits=num_splits, shuffle=True)
    
    oof_pred = np.zeros((len(comp_train_df), 4))
    models = []
    
    for fold, (tr_ind, val_ind) in enumerate(kf.split(comp_train_df)):
        print(f'Fold: {fold+1}')
        x_train, x_val = comp_train_df[features].iloc[tr_ind], comp_train_df[features].iloc[val_ind]
        y_train, y_val = comp_train_df[target][tr_ind], comp_train_df[target][val_ind]
        train_set = lgb.Dataset(x_train, y_train)
        val_set = lgb.Dataset(x_val, y_val)

        model = lgb.train(params, train_set, num_boost_round = num_boost_round, early_stopping_rounds = early_stopping_rounds, 
                          valid_sets=[train_set, val_set], verbose_eval = early_stopping_rounds)
        oof_pred[val_ind] = model.predict(x_val)
        models.append(model)
        
        val_crt_fold = qwk3(y_val, oof_pred[val_ind].argmax(axis = 1))
        print(f'Fold: {fold+1} quadratic weighted kappa score: {np.round(val_crt_fold,4)}')
        
    res = qwk3(comp_train_df['accuracy_group'], oof_pred.argmax(axis = 1))
    print(f'Quadratic weighted score: {np.round(res,4)}')
        
    return models

In [22]:
%%time
models = train_model(comp_train_df)

Fold: 1
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.499339	valid_1's multi_logloss: 0.510738
[200]	training's multi_logloss: 0.238482	valid_1's multi_logloss: 0.244201
[300]	training's multi_logloss: 0.117037	valid_1's multi_logloss: 0.119994
[400]	training's multi_logloss: 0.0585739	valid_1's multi_logloss: 0.0601427
[500]	training's multi_logloss: 0.0294545	valid_1's multi_logloss: 0.0302874
[600]	training's multi_logloss: 0.0150019	valid_1's multi_logloss: 0.0154656
[700]	training's multi_logloss: 0.00763799	valid_1's multi_logloss: 0.00789024
Did not meet early stopping. Best iteration is:
[700]	training's multi_logloss: 0.00763799	valid_1's multi_logloss: 0.00789024
Fold: 1 quadratic weighted kappa score: 1.0
Fold: 2
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.499684	valid_1's multi_logloss: 0.509104
[200]	training's multi_logloss: 0.238634	valid_1's multi_logloss: 0.243583
[300

## Inference

In [25]:
def run_predictions(models, df):
    y_pred = np.zeros((len(df), 4))
    for i, model in enumerate(models):
        y_pred += model.predict(df[features])
        print(f'Ran {i}th model ')
    return y_pred / num_splits

In [26]:
y_pred = run_predictions(models[:], comp_test_df)

Ran 0th model 
Ran 1th model 
Ran 2th model 
Ran 3th model 
Ran 4th model 
Ran 5th model 
Ran 6th model 
Ran 7th model 
Ran 8th model 
Ran 9th model 


In [27]:
np.unique(y_pred.argmax(-1), return_counts=True)

(array([0, 1, 2, 3]), array([716,  39,  41, 204]))

In [28]:
comp_test_df['accuracy_group'] = y_pred.argmax(-1)

## Submission

In [29]:
def prepare_submission(submission_df):
    sample_submission_df = pd.read_csv(path/'sample_submission.csv')
    sample_submission_df.drop('accuracy_group', inplace = True, axis = 1)
    sample_submission_df = sample_submission_df.merge(submission_df, on = 'installation_id')
    sample_submission_df = sample_submission_df[['installation_id', 'accuracy_group']]
    sample_submission_df.to_csv('submission.csv', index = False)

In [30]:
prepare_submission(comp_test_df)

In [31]:
!head submission.csv

installation_id,accuracy_group
00abaee7,3
01242218,3
017c5718,0
01a44906,0
01bc6cb6,0
02256298,3
0267757a,0
027e7ce5,0
02a29f99,0


In [None]:
!cat submission.csv | wc -l