In [30]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
import xgboost as xgb

In [32]:
path=Path('/kaggle/data_science_bowl')
path

PosixPath('/kaggle/data_science_bowl')

### Read Data

In [33]:
def read_data():
    train_df = pd.read_csv(path/'train.csv')
    test_df = pd.read_csv(path/'test.csv')
    train_labels_df = pd.read_csv(path/'train_labels.csv')
    sample_submission_df = pd.read_csv(path/'sample_submission.csv')
    return train_df, test_df, train_labels_df, sample_submission_df

In [34]:
train_df, test_df, train_labels_df, sample_submission_df = read_data()

In [35]:
train_df.head()

Unnamed: 0,event_id,game_session,timestamp,event_data,installation_id,event_count,event_code,game_time,title,type,world
0,27253bdc,45bb1e1b6b50c07b,2019-09-06T17:53:46.937Z,"{""event_code"": 2000, ""event_count"": 1}",0001e90f,1,2000,0,Welcome to Lost Lagoon!,Clip,NONE
1,27253bdc,17eeb7f223665f53,2019-09-06T17:54:17.519Z,"{""event_code"": 2000, ""event_count"": 1}",0001e90f,1,2000,0,Magma Peak - Level 1,Clip,MAGMAPEAK
2,77261ab5,0848ef14a8dc6892,2019-09-06T17:54:56.302Z,"{""version"":""1.0"",""event_count"":1,""game_time"":0...",0001e90f,1,2000,0,Sandcastle Builder (Activity),Activity,MAGMAPEAK
3,b2dba42b,0848ef14a8dc6892,2019-09-06T17:54:56.387Z,"{""description"":""Let's build a sandcastle! Firs...",0001e90f,2,3010,53,Sandcastle Builder (Activity),Activity,MAGMAPEAK
4,1bb5fbdb,0848ef14a8dc6892,2019-09-06T17:55:03.253Z,"{""description"":""Let's build a sandcastle! Firs...",0001e90f,3,3110,6972,Sandcastle Builder (Activity),Activity,MAGMAPEAK


In [36]:
test_df.head()

Unnamed: 0,event_id,game_session,timestamp,event_data,installation_id,event_count,event_code,game_time,title,type,world
0,27253bdc,0ea9ecc81a565215,2019-09-10T16:50:24.910Z,"{""event_code"": 2000, ""event_count"": 1}",00abaee7,1,2000,0,Welcome to Lost Lagoon!,Clip,NONE
1,27253bdc,c1ea43d8b8261d27,2019-09-10T16:50:55.503Z,"{""event_code"": 2000, ""event_count"": 1}",00abaee7,1,2000,0,Magma Peak - Level 1,Clip,MAGMAPEAK
2,27253bdc,7ed86c6b72e725e2,2019-09-10T16:51:51.805Z,"{""event_code"": 2000, ""event_count"": 1}",00abaee7,1,2000,0,Magma Peak - Level 2,Clip,MAGMAPEAK
3,27253bdc,7e516ace50e7fe67,2019-09-10T16:53:12.825Z,"{""event_code"": 2000, ""event_count"": 1}",00abaee7,1,2000,0,Crystal Caves - Level 1,Clip,CRYSTALCAVES
4,7d093bf9,a022c3f60ba547e7,2019-09-10T16:54:12.115Z,"{""version"":""1.0"",""round"":0,""event_count"":1,""ga...",00abaee7,1,2000,0,Chow Time,Game,CRYSTALCAVES


In [37]:
train_labels_df.head()

Unnamed: 0,game_session,installation_id,title,num_correct,num_incorrect,accuracy,accuracy_group
0,6bdf9623adc94d89,0006a69f,Mushroom Sorter (Assessment),1,0,1.0,3
1,77b8ee947eb84b4e,0006a69f,Bird Measurer (Assessment),0,11,0.0,0
2,901acc108f55a5a1,0006a69f,Mushroom Sorter (Assessment),1,0,1.0,3
3,9501794defd84e4d,0006a69f,Mushroom Sorter (Assessment),1,1,0.5,2
4,a9ef3ecb3d1acc6a,0006a69f,Bird Measurer (Assessment),1,0,1.0,3


## Feature Engineering

In [38]:
main_key = 'installation_id'

In [48]:
def get_event_counts(df, column:str='event_id', simple_event_count=True):
    df_event_count = df.groupby([main_key, column]).agg({column: ['count']}) \
    if simple_event_count else df.groupby([main_key, column]).agg({'event_count': 'sum'})
    df_event_count.columns = ['count']
    df_event_count_pivot = df_event_count.pivot_table(index = main_key, columns = [column], values = 'count')
    return df_event_count_pivot.fillna(0)

In [51]:
get_event_counts(train_df, column='event_id', simple_event_count=False)

event_id,003cd2ee,0086365d,00c73085,01ca3a3c,022b4259,02a42007,0330ab6a,0413e89d,04df9b66,05ad839b,...,f56e0afc,f5b8c21a,f6947f54,f71c4741,f7e47413,f806dc10,f93fc684,fbaf3456,fcfdffb6,fd20ea40
installation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0001e90f,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,24357.0,24453.0,311.0,0.0,0.0,0.0,0.0
000447c4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,139.0,0.0,0.0,0.0,0.0
0006a69f,0.0,0.0,0.0,0.0,2132.0,6200.0,0.0,1454.0,0.0,0.0,...,2.0,191.0,48.0,3144.0,2907.0,400.0,0.0,523.0,3517.0,0.0
0006c192,0.0,2.0,366.0,0.0,547.0,0.0,363.0,734.0,0.0,0.0,...,1.0,0.0,36.0,1698.0,1493.0,0.0,2.0,1287.0,3816.0,0.0
0009a5a9,0.0,0.0,957.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fff64664,0.0,0.0,0.0,0.0,0.0,0.0,8612.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1018.0,1042.0,0.0,4.0,0.0,0.0,0.0
fff70c1e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,183.0,194.0,0.0,0.0,0.0,0.0,0.0
fff744ce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fff944d5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
get_event_counts(train_df, simple_event_count=True)

event_id,003cd2ee,0086365d,00c73085,01ca3a3c,022b4259,02a42007,0330ab6a,0413e89d,04df9b66,05ad839b,...,f56e0afc,f5b8c21a,f6947f54,f71c4741,f7e47413,f806dc10,f93fc684,fbaf3456,fcfdffb6,fd20ea40
installation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0001e90f,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,66.0,66.0,4.0,0.0,0.0,0.0,0.0
000447c4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
0006a69f,0.0,0.0,0.0,0.0,24.0,70.0,0.0,29.0,0.0,0.0,...,2.0,3.0,2.0,48.0,45.0,9.0,0.0,18.0,45.0,0.0
0006c192,0.0,1.0,3.0,0.0,8.0,0.0,9.0,10.0,0.0,0.0,...,1.0,0.0,1.0,15.0,14.0,0.0,1.0,14.0,35.0,0.0
0009a5a9,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fff64664,0.0,0.0,0.0,0.0,0.0,0.0,57.0,0.0,0.0,0.0,...,0.0,0.0,0.0,17.0,17.0,0.0,2.0,0.0,0.0,0.0
fff70c1e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,10.0,10.0,0.0,0.0,0.0,0.0,0.0
fff744ce,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fff944d5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
agg_stats = ['mean', 'sum', 'min', 'max', 'std', 'skew', 'median']
main_key = 'installation_id'

def get_object_columns(df, columns):
    df = df.groupby([main_key, columns])['event_id'].count().reset_index()
    df = df.pivot_table(index = main_key, columns = [columns], values = 'event_id')
    df.columns = list(df.columns)
    df.fillna(0, inplace = True)
    return df

def get_numeric_columns(df, column):
    df = df.groupby(main_key).agg({f'{column}': agg_stats})
    df[column].fillna(df[column].mean(), inplace = True)
    df.columns = [f'{column}_{stat}' for stat in agg_stats]
    return df

def get_numeric_columns_add(df, agg_column, column):
    df = df.groupby([main_key, agg_column]).agg({f'{column}': agg_stats}).reset_index()
    df = df.pivot_table(index = main_key, columns = [agg_column], values = [col for col in df.columns if col not in [main_key, agg_column]])
    df[column].fillna(df[column].mean(), inplace = True)
    df.columns = list(df.columns)
    return df

def feature_engineering(train_df, test_df, train_labels_df):
    
    numerical_columns = ['game_time']
    categorical_columns = ['type', 'world']

    comp_train_df = pd.DataFrame({main_key: train_df[main_key].unique()})
    comp_train_df.set_index(main_key, inplace = True)
    comp_test_df = pd.DataFrame({main_key: test_df[main_key].unique()})
    comp_test_df.set_index(main_key, inplace = True)
    
    merge_args = {'left_index':True, 'right_index':True}

    for i in numerical_columns:
        comp_train_df = comp_train_df.merge(get_numeric_columns(train_df, i), **merge_args)
        comp_test_df = comp_test_df.merge(get_numeric_columns(test_df, i), **merge_args)
    
    for i in categorical_columns:
        comp_train_df = comp_train_df.merge(get_object_columns(train_df, i), **merge_args)
        comp_test_df = comp_test_df.merge(get_object_columns(test_df, i), **merge_args)
    
    for i in categorical_columns:
        for j in numerical_columns:
            comp_train_df = comp_train_df.merge(get_numeric_columns_add(train_df, i, j), **merge_args)
            comp_test_df = comp_test_df.merge(get_numeric_columns_add(test_df, i, j), **merge_args)
            
    comp_train_df = comp_train_df.merge(get_event_counts(train_df, column='event_id', simple_event_count=True), **merge_args)
    comp_test_df = comp_test_df.merge(get_event_counts(test_df, column='event_id', simple_event_count=True), **merge_args)
    
    comp_train_df = comp_train_df.merge(get_event_counts(train_df, column='event_code', simple_event_count=True), **merge_args)
    comp_test_df = comp_test_df.merge(get_event_counts(test_df, column='event_code', simple_event_count=True), **merge_args)
    
    comp_train_df.reset_index(inplace = True)
    comp_test_df.reset_index(inplace = True)
    
    print(f'Our training set has {comp_train_df.shape[0]} rows and {comp_train_df.shape[1]} columns')

    # get the mode of the title
    labels_map = dict(train_labels_df.groupby('title')['accuracy_group'].agg(lambda x:x.value_counts().index[0]))
    # merge target
    labels = train_labels_df[[main_key, 'title', 'accuracy_group']]
    # replace title with the mode
    labels.loc[:,'title'] = labels['title'].map(labels_map)
    # get title from the test set
    comp_test_df.loc[:,'title'] = test_df.groupby(main_key).last()['title'].map(labels_map).reset_index(drop = True)
    # join train with labels
    comp_train_df = labels.merge(comp_train_df, on = main_key, how = 'left')
    print(f'We have {comp_train_df.shape[0]} training rows')
    
    return comp_train_df, comp_test_df

In [53]:
comp_train_df, comp_test_df = feature_engineering(train_df, test_df, train_labels_df)

Our training set has 17000 rows and 498 columns


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


We have 17690 training rows


In [54]:
pd.options.display.max_columns = None
comp_test_df

Unnamed: 0,installation_id,game_time_mean,game_time_sum,game_time_min,game_time_max,game_time_std,game_time_skew,game_time_median,Activity,Assessment,Clip,Game,CRYSTALCAVES,MAGMAPEAK,NONE,TREETOPCITY,"(game_time, max, Activity)","(game_time, max, Assessment)","(game_time, max, Clip)","(game_time, max, Game)","(game_time, mean, Activity)","(game_time, mean, Assessment)","(game_time, mean, Clip)","(game_time, mean, Game)","(game_time, median, Activity)","(game_time, median, Assessment)","(game_time, median, Clip)","(game_time, median, Game)","(game_time, min, Activity)","(game_time, min, Assessment)","(game_time, min, Clip)","(game_time, min, Game)","(game_time, skew, Activity)","(game_time, skew, Assessment)","(game_time, skew, Clip)","(game_time, skew, Game)","(game_time, std, Activity)","(game_time, std, Assessment)","(game_time, std, Clip)","(game_time, std, Game)","(game_time, sum, Activity)","(game_time, sum, Assessment)","(game_time, sum, Clip)","(game_time, sum, Game)","(game_time, max, CRYSTALCAVES)","(game_time, max, MAGMAPEAK)","(game_time, max, NONE)","(game_time, max, TREETOPCITY)","(game_time, mean, CRYSTALCAVES)","(game_time, mean, MAGMAPEAK)","(game_time, mean, NONE)","(game_time, mean, TREETOPCITY)","(game_time, median, CRYSTALCAVES)","(game_time, median, MAGMAPEAK)","(game_time, median, NONE)","(game_time, median, TREETOPCITY)","(game_time, min, CRYSTALCAVES)","(game_time, min, MAGMAPEAK)","(game_time, min, NONE)","(game_time, min, TREETOPCITY)","(game_time, skew, CRYSTALCAVES)","(game_time, skew, MAGMAPEAK)","(game_time, skew, NONE)","(game_time, skew, TREETOPCITY)","(game_time, std, CRYSTALCAVES)","(game_time, std, MAGMAPEAK)","(game_time, std, NONE)","(game_time, std, TREETOPCITY)","(game_time, sum, CRYSTALCAVES)","(game_time, sum, MAGMAPEAK)","(game_time, sum, NONE)","(game_time, sum, TREETOPCITY)",0086365d,00c73085,022b4259,02a42007,0330ab6a,0413e89d,04df9b66,05ad839b,06372577,070a5291,08fd73f3,08ff79ad,0a08139c,0d18d96c,0d1da71f,0db6d71d,1325467d,1340b8d7,1375ccb7,14de4c5d,155f62a4,1575e76c,15a43e5b,15ba1109,15eb4a7d,15f99afc,160654fd,16667cc5,16dffff1,17113b36,19967db1,1996c610,1af8be29,1bb5fbdb,1beb320a,1c178d24,1cc7cfca,1cf54632,1f19558b,222660ff,2230fab4,250513af,25fa8af4,262136f4,26a5a3dd,26fd2d99,27253bdc,28520915,28a4eb9a,28ed704e,28f975ea,29bdd9ba,29f54413,2a444e03,2a512369,2b058fe3,2b9272f4,2c4e6db0,2dc29e21,2dcad279,2fb91ec1,30614231,30df3273,31973d56,3323d7e9,33505eae,3393b68b,363c86c9,363d3849,36fa3ebe,37937459,37c53127,37db1c2f,37ee8496,38074c54,392e14df,3a4be871,3afb49e6,3afde5dd,3b2048ee,3babcb9b,3bb91ced,3bb91dda,3bf1cf26,3bfd1a65,3ccd3f02,3d0b9317,3d63345e,3d8c61b0,3dcdda7f,3ddc79c3,3dfd4aa4,3edf6747,3ee399c3,44cb4907,45d01abe,461eace6,46b50ba8,46cd75b4,47026d5f,47efca07,47f43a44,48349b14,4901243f,499edb7c,49ed92e9,4a09ace1,4a4c3d21,4b5efe37,4bb2f698,4c2ec19f,4d6737eb,4d911100,4e5fc6f5,4ef8cdd3,51102b85,51311d7a,5154fc30,5290eab1,532a2afb,5348fd84,53c6e11a,55115cbd,562cec5f,565a3990,56817e2b,56bcd38d,56cd3b43,5859dfb6,587b5989,58a0de5c,598f4598,5a848010,5b49460a,5be391b5,5c2f29ca,5c3d2b2f,5d042115,5de79a6a,5e109ec3,5e3ea25a,5e812b27,5f0eb72c,5f5b2617,6043a2b4,6077cc36,6088b756,63f13dd7,65a38bf7,65abac75,67439901,67aa2ada,69fdac0a,6aeafed4,6bf9e3e1,6c517a88,6c930e6e,6cf7d25c,6d90d394,6f445b57,6f4adc4b,6f4bd64e,6f8106d9,7040c096,709b1251,71e712d8,71fe8f75,731c0cbe,736f9581,7372e1a5,73757a5e,7423acbc,74e5f8a7,7525289a,756e5507,763fc34e,76babcde,77261ab5,77c76bc5,77ead60d,792530f8,795e4a37,7961e599,7ab78247,7ad3efc6,7cf1bc53,7d093bf9,7d5c30a2,7da34a02,7dfe6d8a,7ec0c298,7f0836bf,804ee27f,828e68f9,832735e1,83c6c409,84538528,84b0e0c8,857f21c0,85d1b0de,85de926c,86ba578b,86c924c4,87d743c1,884228c8,88d4a5be,895865f3,89aace00,8ac7cce4,8af75982,8b757ab8,8d748b58,8d7e386c,8d84fa81,8f094001,8fee50e2,907a054b,90d848e0,90ea0bac,90efca10,91561152,923afab1,92687c59,93b353f2,93edfe2e,9554a50b,99abe2bb,99ea62f3,9b01374f,9b23e8ee,9b4001e4,9c5ef70c,9ce586dd,9d29771f,9d4e7b25,9de5e594,9e34ea74,9e4c8c7b,9e6b7fb5,9ed8f6da,9ee1c98c,a0faea5d,a1192f43,a16a373e,a1bbe385,a1e4395d,a29c5338,a2df0760,a44b10dc,a52b92d5,a592d54e,a5be6304,a5e9da97,a6d66e51,a76029ee,a7640a16,a8876db3,a8a78786,a8efe47b,ab3136ba,abc5811c,ac92046e,acf5c23f,ad148f58,ad2fc29c,b012cd7f,b120f2ac,b1d5101d,b2dba42b,b2e5b0f1,b5053438,b74258a0,b7530680,b7dc8128,b80e5e84,b88f38da,bb3e370b,bbfe0445,bc8f2793,bcceccc6,bd612267,bd701df8,bdf49a58,beb0a7b9,c0415e5c,c189aaf2,c1cac9a2,c277e121,c2baf0bd,c51d8688,c54cf6c5,c58186bf,c6971acf,c7128948,c74f40cd,c7f7f0e1,c7fe2a55,c952eb01,ca11f653,cb1178ad,cb6010f8,cc5087a3,cdd22e43,cf7638f3,cf82af56,cfbd47c8,d02b7a8e,d06f75b5,d122731b,d185d3ea,d2278a3b,d2659ab4,d2e9262e,d3268efa,d3640339,d38c2fd7,d3f1e122,d45ed6a1,d51b1749,d88ca108,d88e8f25,d9c005dd,daac11b0,db02c830,dcaede90,dcb55a27,de26c3a6,df4940d3,df4fe8b6,e04fb33d,e080a381,e37a2b78,e3ff61fb,e4f1efe6,e5734469,e57dd7af,e5c9df6f,e64e2cfd,e694a35b,e720d930,e7561dd2,e79f3763,e7e44842,e9c52111,ea296733,ea321fb1,eb2c19cd,ec138c1c,ecaab346,ecc36b7f,f28c589a,f32856e4,f3cd5473,f50fc6c1,f54238ee,f56e0afc,f5b8c21a,f6947f54,f71c4741,f7e47413,f806dc10,f93fc684,fbaf3456,fcfdffb6,fd20ea40,2000,2010,2020,2025,2030,2035,2040,2050,2060,2070,2075,2080,2081,2083,3010,3020,3021,3110,3120,3121,4010,4020,4021,4022,4025,4030,4031,4035,4040,4045,4050,4070,4080,4090,4095,4100,4110,4220,4230,4235,5000,5010,title
0,00abaee7,63567.408986,55176511,0,1960630,149911.784066,11.578484,40657.0,454.0,27.0,14.0,373.0,253.0,241.0,1.0,373.0,105916.000000,30038.0,0.0,1.960630e+06,38077.154185,14008.074074,0.0,100566.394102,30520.000000,14303.0,0.0,70287.000000,0.0,0.0,0.0,0.0,0.768075,0.089729,0.0,7.965669,30100.806203,9279.184749,0.0,220845.119349,1.728703e+07,378218.0,0.0,3.751126e+07,135794.00000,188805.000000,0.0,1960630.0,54616.913043,69526.593361,0.0,65958.506702,52104.000000,56998.000000,0.0,33074.0,0.0,0.0,0.0,0.0,0.376034,0.510952,0.0,8.243170,37649.869386,53238.970991,0.0,222474.449033,1.381808e+07,1.675591e+07,0.0,24602523.0,0.0,296.0,0.0,3403.0,2780.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,0.0,30.0,0.0,1264.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,894.0,0.0,0.0,665.0,0.0,57.0,0.0,2865.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,305.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,62.0,480.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,371.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,273.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,348.0,0.0,0.0,0.0,1619.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,648.0,0.0,0.0,0.0,0.0,0.0,48.0,0.0,0.0,0.0,55.0,0.0,1156.0,0.0,0.0,0.0,0.0,21.0,5.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,339.0,0.0,31.0,0.0,3.0,1133.0,0.0,0.0,0.0,0.0,0.0,0.0,4249.0,1.0,0.0,0.0,0.0,21.0,337.0,625.0,1.0,0.0,1.0,7.0,0.0,0.0,28.0,0.0,0.0,26.0,1046.0,0.0,450.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3747.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,292.0,54.0,381.0,0.0,0.0,705.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,0.0,0.0,953.0,0.0,0.0,65.0,0.0,0.0,0.0,60.0,0.0,861.0,26.0,0.0,25.0,0.0,1.0,587.0,2234.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2110.0,1002.0,0.0,0.0,59.0,0.0,0.0,0.0,25.0,0.0,0.0,0.0,2.0,0.0,0.0,53.0,0.0,0.0,0.0,0.0,0.0,0.0,2912.0,202.0,0.0,22.0,1345.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64.0,0.0,0.0,0.0,0.0,55.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,330.0,0.0,0.0,0.0,0.0,0.0,3150.0,0.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,26.0,26.0,409.0,60.0,374.0,0.0,0.0,0.0,7.0,21.0,0.0,0.0,0.0,0.0,5088.0,3131.0,408.0,5136.0,3234.0,420.0,6.0,7022.0,705.0,0.0,348.0,8231.0,0.0,1637.0,0.0,0.0,0.0,11192.0,0.0,0.0,0.0,22.0,0.0,0.0,0.0,0.0,0.0,0.0,3
1,01242218,75770.044869,206018752,0,317027,61789.322138,1.421303,62056.0,1356.0,245.0,29.0,1089.0,885.0,848.0,2.0,984.0,317027.000000,82149.0,0.0,1.694920e+05,87635.177729,27704.387755,0.0,73827.250689,65968.500000,21625.0,0.0,70527.000000,0.0,0.0,0.0,0.0,1.211983,0.764551,0.0,0.263853,74745.148527,21524.172814,0.0,40292.737670,1.188333e+08,6787575.0,0.0,8.039788e+07,317027.00000,283765.000000,0.0,178550.0,81169.943503,85886.292453,0.0,62349.365854,67509.000000,65141.000000,0.0,51824.0,0.0,0.0,0.0,0.0,1.640714,1.068934,0.0,0.680239,67990.867851,68507.373099,0.0,45265.064084,7.183540e+07,7.283158e+07,0.0,61351776.0,2.0,201.0,803.0,2137.0,334.0,186.0,16.0,0.0,3.0,110.0,1018.0,333.0,4239.0,0.0,100.0,141.0,784.0,409.0,98.0,199.0,2.0,111.0,644.0,1.0,148.0,587.0,34.0,0.0,210.0,75.0,0.0,698.0,0.0,1685.0,7.0,521.0,405.0,1.0,114.0,0.0,370.0,529.0,75.0,255.0,0.0,931.0,29.0,56.0,68.0,31.0,261.0,1.0,3.0,1034.0,7.0,36.0,398.0,63.0,417.0,75.0,180.0,46.0,0.0,0.0,1036.0,4.0,61.0,55.0,72.0,217.0,0.0,410.0,232.0,41.0,0.0,37.0,0.0,0.0,78.0,0.0,6560.0,0.0,16.0,0.0,1.0,52.0,155.0,0.0,914.0,42.0,1247.0,18.0,0.0,38.0,448.0,34.0,3005.0,0.0,638.0,229.0,0.0,0.0,1.0,2.0,6242.0,101.0,236.0,140.0,2.0,2958.0,3083.0,0.0,813.0,0.0,415.0,391.0,1.0,6488.0,17.0,33.0,0.0,8.0,110.0,551.0,648.0,147.0,9943.0,1415.0,34.0,218.0,92.0,1075.0,359.0,1.0,2.0,12.0,982.0,3977.0,0.0,10.0,6845.0,2011.0,158.0,0.0,0.0,0.0,21.0,88.0,2.0,2.0,1960.0,0.0,499.0,0.0,504.0,1912.0,46.0,568.0,1.0,0.0,102.0,0.0,0.0,2.0,244.0,112.0,4160.0,0.0,3.0,255.0,392.0,42.0,664.0,35.0,1.0,0.0,559.0,2.0,0.0,784.0,687.0,3.0,161.0,5043.0,2.0,830.0,1.0,10.0,347.0,254.0,94.0,163.0,414.0,8.0,559.0,17.0,739.0,1367.0,125.0,0.0,0.0,0.0,1359.0,2.0,3185.0,36.0,36.0,0.0,1.0,205.0,492.0,0.0,427.0,2.0,9.0,253.0,10.0,1.0,33.0,3779.0,32.0,63.0,0.0,109.0,0.0,0.0,3.0,4.0,1.0,1.0,90.0,1.0,79.0,79.0,0.0,67.0,815.0,85.0,0.0,12.0,1045.0,49.0,0.0,250.0,586.0,58.0,147.0,0.0,829.0,65.0,559.0,51.0,522.0,107.0,0.0,68.0,10.0,229.0,904.0,588.0,4.0,0.0,0.0,0.0,97.0,75.0,0.0,0.0,1621.0,13.0,89.0,12.0,1323.0,1.0,5108.0,1809.0,4325.0,1482.0,161.0,470.0,1223.0,118.0,106.0,1772.0,441.0,69.0,128.0,0.0,123.0,11.0,17.0,255.0,3.0,37.0,83.0,1.0,223.0,0.0,108.0,0.0,233.0,2.0,3174.0,548.0,789.0,567.0,0.0,35.0,9.0,168.0,2.0,10.0,136.0,86.0,0.0,0.0,626.0,0.0,128.0,0.0,0.0,2.0,111.0,2.0,302.0,0.0,1115.0,2019.0,115.0,43.0,0.0,105.0,238.0,0.0,0.0,0.0,81.0,1.0,1708.0,1257.0,476.0,825.0,0.0,1303.0,112.0,1328.0,0.0,39.0,11.0,0.0,552.0,52.0,1807.0,106.0,0.0,1.0,166.0,96.0,1017.0,1036.0,120.0,2.0,188.0,2986.0,2.0,58.0,161.0,4465.0,19.0,5248.0,52.0,302.0,410.0,19.0,21.0,30.0,490.0,128.0,309.0,27857.0,1193.0,4232.0,28413.0,1291.0,4337.0,24.0,22763.0,1151.0,3624.0,6371.0,28359.0,698.0,4518.0,821.0,241.0,0.0,24947.0,0.0,333.0,0.0,1061.0,75.0,409.0,0.0,0.0,107.0,112.0,3
2,017c5718,33017.233333,4952585,0,60943,17140.293312,-0.421696,35616.5,143.0,1.0,6.0,0.0,0.0,0.0,4.0,146.0,60943.000000,0.0,0.0,6.529826e+05,34633.461538,0.000000,0.0,180209.144675,36862.000000,0.0,0.0,190872.320732,0.0,0.0,0.0,0.0,-0.418506,0.663060,0.0,0.689734,15871.435721,38723.783016,0.0,182855.771844,4.952585e+06,0.0,0.0,1.617186e+08,316635.54966,665107.285714,0.0,60943.0,75932.838429,117047.319657,0.0,33921.815068,60235.606803,72952.560113,0.0,36318.0,0.0,0.0,0.0,0.0,0.578195,0.917524,0.0,-0.430958,75616.844935,143236.743283,0.0,16462.002358,5.277859e+07,1.391002e+08,0.0,4952585.0,0.0,0.0,0.0,542.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,127.0,0.0,0.0,0.0,0.0,0.0,950.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,444.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,279.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,650.0,0.0,126.0,0.0,0.0,0.0,0.0,0.0,623.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,131.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,368.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,853.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,712.0,0.0,0.0,776.0,0.0,0.0,0.0,499.0,0.0,853.0,127.0,1492.0,0.0,0.0,0.0,0.0,0.0,723.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
3,01a44906,41162.901709,9632119,0,85983,28696.300044,0.231737,32967.0,145.0,1.0,10.0,78.0,0.0,0.0,3.0,231.0,85983.000000,0.0,0.0,7.720400e+04,46314.958621,0.000000,0.0,37390.384615,39433.000000,0.0,0.0,36726.500000,0.0,0.0,0.0,0.0,0.051522,0.663060,0.0,0.094331,30760.619218,38723.783016,0.0,20391.645155,6.715669e+06,0.0,0.0,2.916450e+06,316635.54966,665107.285714,0.0,85983.0,75932.838429,117047.319657,0.0,41697.484848,60235.606803,72952.560113,0.0,33817.0,0.0,0.0,0.0,0.0,0.578195,0.917524,0.0,0.222744,75616.844935,143236.743283,0.0,28492.647474,5.277859e+07,1.391002e+08,0.0,9632119.0,0.0,0.0,0.0,113.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,694.0,0.0,147.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,74.0,710.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,43.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,278.0,0.0,671.0,0.0,361.0,0.0,0.0,0.0,0.0,0.0,2350.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,103.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,178.0,0.0,317.0,0.0,0.0,0.0,0.0,0.0,158.0,0.0,0.0,0.0,101.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,141.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,114.0,0.0,0.0,0.0,0.0,144.0,0.0,0.0,0.0,307.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,125.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2039.0,0.0,14.0,0.0,74.0,103.0,141.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,520.0,101.0,144.0,590.0,114.0,147.0,2.0,1142.0,0.0,2039.0,361.0,3157.0,0.0,0.0,0.0,0.0,0.0,989.0,0.0,43.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
4,01bc6cb6,147664.880252,140576966,0,511237,128478.809653,1.250042,106076.5,226.0,1.0,17.0,708.0,522.0,3.0,3.0,424.0,221698.000000,0.0,0.0,5.112370e+05,110746.128319,0.000000,0.0,163203.871469,121294.000000,0.0,0.0,105111.000000,0.0,0.0,0.0,0.0,-0.074129,0.663060,0.0,1.015493,57606.576415,38723.783016,0.0,141120.117860,2.502862e+07,0.0,0.0,1.155483e+08,221698.00000,0.000000,0.0,511237.0,90758.699234,0.000000,0.0,219813.502358,84012.000000,0.000000,0.0,200473.5,0.0,0.0,0.0,0.0,0.291697,0.000000,0.0,0.298199,52164.580560,0.000000,0.0,155679.905961,4.737604e+07,0.000000e+00,0.0,93200925.0,0.0,0.0,0.0,0.0,599.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,359.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.0,0.0,0.0,0.0,13447.0,0.0,2404.0,0.0,661.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2349.0,12220.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4890.0,82.0,0.0,0.0,187.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,81.0,0.0,0.0,0.0,0.0,1426.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,429.0,591.0,0.0,0.0,0.0,0.0,2965.0,0.0,0.0,0.0,4.0,2426.0,0.0,0.0,0.0,0.0,919.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,447.0,9965.0,0.0,0.0,9936.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,440.0,0.0,0.0,0.0,0.0,933.0,0.0,839.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,450.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,446.0,0.0,0.0,0.0,0.0,0.0,0.0,1625.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,368.0,0.0,0.0,878.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4604.0,78.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3313.0,0.0,85.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1386.0,753.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,1377.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,83.0,0.0,2371.0,0.0,602.0,0.0,6678.0,0.0,0.0,1066.0,1320.0,0.0,0.0,182.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1419.0,3.0,0.0,0.0,1.0,2388.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1562.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,25.0,0.0,2880.0,1386.0,2900.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.0,7776.0,2054.0,3062.0,7521.0,2161.0,3091.0,13.0,16472.0,0.0,429.0,0.0,25904.0,0.0,8241.0,839.0,187.0,0.0,16125.0,0.0,0.0,831.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,fee254cf,56850.495283,12052305,0,155008,43941.426092,0.454220,49113.5,0.0,116.0,4.0,92.0,0.0,1.0,1.0,210.0,582662.403823,155008.0,0.0,1.149210e+05,108733.747862,53599.681034,0.0,63421.108696,80719.541219,33487.0,0.0,71305.000000,0.0,0.0,0.0,0.0,0.867264,0.752817,0.0,-0.280526,127624.865673,49701.794696,0.0,34001.061136,9.439616e+07,6217563.0,0.0,5.834742e+06,316635.54966,0.000000,0.0,155008.0,75932.838429,0.000000,0.0,57391.928571,60235.606803,0.000000,0.0,49970.5,0.0,0.0,0.0,0.0,0.578195,0.917524,0.0,0.446872,75616.844935,143236.743283,0.0,43796.157031,5.277859e+07,0.000000e+00,0.0,12052305.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,68.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.0,0.0,0.0,0.0,0.0,0.0,0.0,724.0,0.0,141.0,0.0,0.0,0.0,29.0,0.0,0.0,0.0,4.0,0.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,148.0,740.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70.0,0.0,331.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,360.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,587.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1342.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,186.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,257.0,0.0,0.0,0.0,0.0,0.0,354.0,0.0,53.0,0.0,0.0,0.0,59.0,0.0,33.0,0.0,0.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,75.0,90.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,284.0,0.0,0.0,0.0,0.0,74.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,135.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80.0,0.0,0.0,0.0,0.0,138.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,119.0,0.0,0.0,0.0,0.0,69.0,0.0,0.0,9.0,103.0,220.0,77.0,286.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,654.0,74.0,249.0,600.0,80.0,257.0,2.0,998.0,0.0,0.0,389.0,1380.0,0.0,0.0,64.0,0.0,0.0,1715.0,0.0,0.0,90.0,97.0,47.0,0.0,0.0,0.0,0.0,0.0,3
996,ff57e602,44772.023102,13565923,0,139394,31785.997875,0.699889,39918.0,127.0,29.0,11.0,136.0,246.0,0.0,1.0,56.0,69318.000000,36169.0,0.0,1.393940e+05,33302.031496,17072.896552,0.0,65010.669118,33137.000000,18600.0,0.0,65769.500000,0.0,0.0,0.0,0.0,0.026164,0.026070,0.0,0.160738,18880.279305,11068.777982,0.0,32210.700278,4.229358e+06,495114.0,0.0,8.841451e+06,139394.00000,665107.285714,0.0,57697.0,48402.142276,117047.319657,0.0,29624.928571,45304.000000,72952.560113,0.0,29701.5,0.0,0.0,0.0,0.0,0.576489,0.917524,0.0,-0.158853,33037.671890,143236.743283,0.0,18885.540724,1.190693e+07,1.391002e+08,0.0,1658996.0,0.0,0.0,0.0,328.0,177.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,195.0,0.0,0.0,1066.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,81.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,146.0,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,136.0,0.0,0.0,0.0,1.0,607.0,0.0,0.0,0.0,0.0,593.0,0.0,0.0,0.0,0.0,200.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,79.0,1044.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,109.0,0.0,0.0,0.0,101.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,110.0,0.0,0.0,0.0,0.0,1.0,137.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1022.0,102.0,0.0,0.0,24.0,0.0,0.0,0.0,81.0,0.0,0.0,0.0,0.0,0.0,0.0,189.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,56.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,0.0,143.0,0.0,0.0,0.0,28.0,0.0,27.0,0.0,0.0,0.0,267.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,250.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,140.0,0.0,438.0,0.0,0.0,276.0,0.0,0.0,24.0,88.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,153.0,149.0,0.0,343.0,0.0,0.0,0.0,0.0,0.0,0.0,56.0,0.0,0.0,26.0,0.0,0.0,85.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,17.0,28.0,126.0,0.0,249.0,0.0,0.0,0.0,81.0,0.0,8.0,0.0,0.0,0.0,509.0,314.0,307.0,431.0,348.0,315.0,4.0,2325.0,0.0,30.0,0.0,1749.0,0.0,526.0,0.0,0.0,0.0,1935.0,0.0,0.0,133.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0
997,ffc73fb2,61802.442966,32508085,0,199825,53869.630318,0.728098,46648.0,238.0,256.0,32.0,0.0,154.0,132.0,1.0,239.0,199825.000000,149031.0,0.0,6.529826e+05,81037.907563,51644.777344,0.0,180209.144675,78483.000000,28885.0,0.0,190872.320732,0.0,0.0,0.0,0.0,0.519924,0.899523,0.0,0.689734,53386.528402,48963.311394,0.0,182855.771844,1.928702e+07,13221063.0,0.0,1.617186e+08,149031.00000,64865.000000,0.0,199825.0,73178.383117,21596.681818,0.0,76936.619247,64435.000000,17101.000000,0.0,78483.0,0.0,0.0,0.0,0.0,0.101301,0.693637,0.0,0.433389,52554.796817,15886.543489,0.0,57100.791765,1.126947e+07,2.850762e+06,0.0,18387852.0,0.0,0.0,0.0,3289.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,303.0,372.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0,0.0,0.0,273.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,106.0,0.0,0.0,0.0,32.0,106.0,0.0,33.0,0.0,0.0,0.0,0.0,0.0,70.0,0.0,0.0,0.0,139.0,0.0,91.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,52.0,444.0,0.0,0.0,47.0,0.0,19.0,6.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,15.0,0.0,0.0,1113.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,340.0,127.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5858.0,69.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,93.0,125.0,0.0,249.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1027.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,122.0,0.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,70.0,0.0,46.0,0.0,0.0,0.0,0.0,0.0,0.0,2840.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,254.0,0.0,166.0,0.0,0.0,0.0,0.0,2213.0,0.0,0.0,0.0,0.0,3142.0,0.0,0.0,2158.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,0.0,0.0,84.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,160.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,257.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3134.0,0.0,0.0,0.0,0.0,0.0,253.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123.0,0.0,0.0,39.0,116.0,82.0,2.0,151.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2647.0,320.0,240.0,2747.0,326.0,250.0,0.0,3739.0,122.0,0.0,1239.0,6689.0,0.0,1183.0,459.0,0.0,0.0,9598.0,0.0,0.0,0.0,395.0,0.0,0.0,0.0,0.0,0.0,0.0,3
998,ffe00ca8,25213.420849,6530276,0,72242,20859.265204,0.705400,19590.0,123.0,110.0,11.0,15.0,5.0,139.0,1.0,114.0,72242.000000,43571.0,0.0,2.256400e+04,35926.439024,17185.645455,0.0,14726.866667,39648.000000,15751.5,0.0,18086.000000,0.0,0.0,0.0,0.0,-0.061215,0.511187,0.0,-0.720429,23350.701961,11718.642969,0.0,8108.364710,4.418952e+06,1890421.0,0.0,2.209030e+05,0.00000,43571.000000,0.0,72242.0,0.000000,15573.532374,0.0,38294.342105,0.000000,14588.000000,0.0,41087.5,0.0,0.0,0.0,0.0,0.000000,0.604975,0.0,-0.201029,0.000000,11511.482848,0.0,22603.416385,0.000000e+00,2.164721e+06,0.0,4365555.0,0.0,0.0,487.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0,0.0,1754.0,0.0,0.0,0.0,54.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,82.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,360.0,0.0,389.0,0.0,0.0,0.0,0.0,0.0,348.0,0.0,0.0,0.0,0.0,0.0,555.0,0.0,141.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,286.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,71.0,105.0,176.0,0.0,0.0,0.0,904.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1808.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,127.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,331.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,180.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,176.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,803.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,19.0,0.0,118.0,0.0,82.0,0.0,0.0,0.0,3.0,0.0,10.0,0.0,0.0,0.0,2119.0,67.0,176.0,2200.0,71.0,180.0,2.0,389.0,0.0,0.0,487.0,1358.0,0.0,358.0,176.0,0.0,0.0,1282.0,0.0,127.0,0.0,141.0,0.0,0.0,0.0,0.0,0.0,0.0,3


## Normalize

In [55]:
# comp_train_df.columns = [c if type(c) != tuple else '_'.join(c) for c in comp_train_df.columns]

In [56]:
# comp_test_df.columns = [c if type(c) != tuple else '_'.join(c) for c in comp_test_df.columns]

In [57]:
# list(comp_train_df.columns)

In [58]:
# def normalize(df: pd.DataFrame, cont_names):
#     "Compute the means and stds of `self.cont_names` columns to normalize them."
#     means, stds = {},{}
#     for n in cont_names:
#         means[n], stds[n] = df[n].mean(), df[n].std()
#         df[n] = (df[n]-means[n]) / (1e-7 + stds[n])

In [59]:
# normalize(comp_train_df, [c for c in comp_train_df.columns if c not in ['installation_id', 'title', 'accuracy_group']])
# normalize(comp_test_df, [c for c in comp_test_df.columns if c not in ['installation_id', 'title', 'accuracy_group']])

## Training

In [60]:
# quadratic weighted kappa
def qwk3(a1, a2, max_rat=3):
    '''
    a1 - ground truth
    a2 - predicted values
    '''
    assert(len(a1) == len(a2))
    a1 = np.asarray(a1, dtype=int)
    a2 = np.asarray(a2, dtype=int)
    hist1 = np.zeros((max_rat + 1, ))
    hist2 = np.zeros((max_rat + 1, ))
    o = 0
    for k in range(a1.shape[0]):
        i, j = a1[k], a2[k]
        hist1[i] += 1
        hist2[j] += 1
        o +=  (i - j) * (i - j)
    e = 0
    for i in range(max_rat + 1):
        for j in range(max_rat + 1):
            e += hist1[i] * hist2[j] * (i - j) * (i - j)
    e = e / a1.shape[0]
    return 1 - o / e

In [61]:
features = [i for i in comp_train_df.columns if i not in ['accuracy_group', 'installation_id']]
target = 'accuracy_group'
num_splits = 10
params = {
    'learning_rate': 0.007,
    'metric': 'multiclass',
    'objective': 'multiclass',
    'num_classes': 4,
    'feature_fraction': 0.75,
    "bagging_fraction": 0.8,
    "bagging_seed": 42,
}

def train_model(comp_train_df, comp_test_df):
    
    kf = KFold(n_splits=num_splits, shuffle=True)
    
    oof_pred = np.zeros((len(comp_train_df), 4))
    models = []
    
    for fold, (tr_ind, val_ind) in enumerate(kf.split(comp_train_df)):
        print(f'Fold: {fold+1}')
        x_train, x_val = comp_train_df[features].iloc[tr_ind], comp_train_df[features].iloc[val_ind]
        y_train, y_val = comp_train_df[target][tr_ind], comp_train_df[target][val_ind]
        train_set = lgb.Dataset(x_train, y_train)
        val_set = lgb.Dataset(x_val, y_val)

        model = lgb.train(params, train_set, num_boost_round = 10000, early_stopping_rounds = 100, 
                          valid_sets=[train_set, val_set], verbose_eval = 100)
        oof_pred[val_ind] = model.predict(x_val)
        models.append(model)
        
        val_crt_fold = qwk3(y_val, oof_pred[val_ind].argmax(axis = 1))
        print(f'Fold: {fold+1} quadratic weighted kappa score: {np.round(val_crt_fold,4)}')
        
    res = qwk3(comp_train_df['accuracy_group'], oof_pred.argmax(axis = 1))
    print(f'Quadratic weighted score: {np.round(res,4)}')
        
    return models

In [62]:
models = train_model(comp_train_df, comp_test_df)

Fold: 1
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.956024	valid_1's multi_logloss: 1.00513
[200]	training's multi_logloss: 0.832317	valid_1's multi_logloss: 0.899192
[300]	training's multi_logloss: 0.763243	valid_1's multi_logloss: 0.846633
[400]	training's multi_logloss: 0.719918	valid_1's multi_logloss: 0.818297
[500]	training's multi_logloss: 0.690785	valid_1's multi_logloss: 0.804245
[600]	training's multi_logloss: 0.670045	valid_1's multi_logloss: 0.798212
[700]	training's multi_logloss: 0.654129	valid_1's multi_logloss: 0.796477
[800]	training's multi_logloss: 0.641024	valid_1's multi_logloss: 0.797507
Early stopping, best iteration is:
[716]	training's multi_logloss: 0.651836	valid_1's multi_logloss: 0.79627
Fold: 1 quadratic weighted kappa score: 0.625
Fold: 2
Training until validation scores don't improve for 100 rounds
[100]	training's multi_logloss: 0.955845	valid_1's multi_logloss: 0.986787
[200]	training's multi_logloss:

## Inference

In [20]:
def add_missing_columns(comp_train_df: pd.DataFrame, comp_test_df: pd.DataFrame):
    missing: set = set(comp_train_df.columns) - set(comp_test_df.columns)
    for col in missing:
        comp_test_df[col] = 0.
    print(f'Added missing colums: {missing}')

In [21]:
add_missing_columns(comp_train_df, comp_test_df)

Added missing colums: {'4074bac2', '0ce40006', '17ca3959', 'accuracy_group', '1b54d27f', 'ecc6157f', '611485c5', '13f56524', '003cd2ee', 'e4d32835', '29a42aea', '5dc079d8', '01ca3a3c', 'ab4ec3a4', '119b5b02', 'bfc77bd6', 'dcb1663e', '7fd1ac25', 'a8cc6fec', '2ec694de'}


In [22]:
def run_predictions(models):
    y_pred = np.zeros((len(comp_test_df), 4))
    for model in models:
        y_pred += model.predict(comp_test_df[features])
    return y_pred / num_splits

In [23]:
y_pred = run_predictions(models)

In [24]:
assert comp_test_df.shape[0] == y_pred.shape[0]

In [25]:
np.unique(y_pred.argmax(-1), return_counts=True)

(array([0, 1, 2, 3]), array([652,  41,  29, 278]))

In [26]:
def prepare_submission(comp_test_df, sample_submission_df, y_pred):
    comp_test_df = comp_test_df.reset_index()
    comp_test_df = comp_test_df[['installation_id']]
    comp_test_df['accuracy_group'] = y_pred.argmax(axis = 1)
    sample_submission_df.drop('accuracy_group', inplace = True, axis = 1)
    sample_submission_df = sample_submission_df.merge(comp_test_df, on = 'installation_id')
    sample_submission_df.to_csv('submission.csv', index = False)

In [27]:
prepare_submission(comp_test_df, sample_submission_df, y_pred)

In [28]:
!head submission.csv

installation_id,accuracy_group
00abaee7,3
01242218,3
017c5718,0
01a44906,0
01bc6cb6,0
02256298,3
0267757a,0
027e7ce5,3
02a29f99,0


In [29]:
!cat submission.csv | wc -l

1001
