In [77]:
import pandas as pd
import numpy as np
import gc

# Scikit-learn
from sklearn.model_selection import StratifiedKFold,KFold 
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc

# Metrics
from sklearn.metrics import average_precision_score, roc_auc_score, mean_absolute_error

from sklearn import preprocessing

import os
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn

In [78]:
x = pd.read_csv('../input/X_train.csv')
y = pd.read_csv('../input/y_train.csv')
test = pd.read_csv('../input/X_test.csv')
sub = pd.read_csv('../input/sample_submission.csv')

In [79]:
x.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.75853,-0.63435,-0.10488,-0.10597,0.10765,0.017561,0.000767,-0.74857,2.103,-9.7532
1,0_1,0,1,-0.75853,-0.63434,-0.1049,-0.106,0.067851,0.029939,0.003385,0.33995,1.5064,-9.4128
2,0_2,0,2,-0.75853,-0.63435,-0.10492,-0.10597,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,0_3,0,3,-0.75852,-0.63436,-0.10495,-0.10597,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.096
4,0_4,0,4,-0.75852,-0.63435,-0.10495,-0.10596,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.441


In [80]:
y.head()

Unnamed: 0,series_id,group_id,surface
0,0,13,fine_concrete
1,1,31,concrete
2,2,20,concrete
3,3,31,concrete
4,4,22,soft_tiles


In [81]:
test.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.025773,-0.98864,-0.14801,0.00335,-0.006524,-0.001071,-0.02739,0.10043,4.2061,-5.5439
1,0_1,0,1,-0.025683,-0.98862,-0.14816,0.003439,-0.11396,0.083987,-0.06059,-0.70889,3.9905,-8.0273
2,0_2,0,2,-0.025617,-0.98861,-0.14826,0.003571,-0.080518,0.11486,-0.037177,1.4571,2.2828,-11.299
3,0_3,0,3,-0.025566,-0.98862,-0.14817,0.003609,0.070067,0.03382,-0.035904,0.71096,1.8582,-12.227
4,0_4,0,4,-0.025548,-0.98866,-0.14792,0.003477,0.15205,-0.029016,-0.015314,3.3996,2.7881,-10.41


In [82]:
x.shape, y.shape , test.shape

((487680, 13), (3810, 3), (488448, 13))

In [83]:
x['series_id'].nunique()

3810

In [84]:
x[x.isnull().any(axis=1)]

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z


In [85]:
y[y.isnull().any(axis=1)]

Unnamed: 0,series_id,group_id,surface


In [86]:
test[test.isnull().any(axis=1)]

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z


In [87]:
le = LabelEncoder()
le.fit(y['surface'])
y['surface'] = le.transform(y['surface'])

In [88]:
y.head()

Unnamed: 0,series_id,group_id,surface
0,0,13,2
1,1,31,1
2,2,20,1
3,3,31,1
4,4,22,6


In [89]:
#FE from https://www.kaggle.com/vanshjatana/help-humanity-by-helping-robots-4e306b
# https://stackoverflow.com/questions/53033620/how-to-convert-euler-angles-to-quaternions-and-get-the-same-euler-angles-back-fr?rq=1
def quaternion_to_euler(x, y, z, w):
    import math
    t0 = +2.0 * (w * x + y * z)
    t1 = +1.0 - 2.0 * (x * x + y * y)
    X = math.atan2(t0, t1)

    t2 = +2.0 * (w * y - z * x)
    t2 = +1.0 if t2 > +1.0 else t2
    t2 = -1.0 if t2 < -1.0 else t2
    Y = math.asin(t2)

    t3 = +2.0 * (w * z + x * y)
    t4 = +1.0 - 2.0 * (y * y + z * z)
    Z = math.atan2(t3, t4)

    return X, Y, Z

def fe(df):
    df['total_angular_velocity'] = (df['angular_velocity_X'] ** 2 + df['angular_velocity_Y'] ** 2 + df['angular_velocity_Z'] ** 2) ** 0.5
    df['total_linear_acceleration'] = (df['linear_acceleration_X'] ** 2 + df['linear_acceleration_Y'] ** 2 + df['linear_acceleration_Z'] ** 2) ** 0.5
    df['total_xyz'] = (df['orientation_X']**2 + df['orientation_Y']**2 +df['orientation_Z'])**0.5
    
    df['acc_vs_vel'] = df['total_linear_acceleration'] / df['total_angular_velocity']
    
    
    temp_df = pd.DataFrame()
    for col in df.columns[3:]:
        temp_df[col + '_mean'] = df.groupby(['series_id'])[col].mean()
        temp_df[col + '_median'] = df.groupby(['series_id'])[col].median()
        temp_df[col + '_max'] = df.groupby(['series_id'])[col].max()
        temp_df[col + '_min'] = df.groupby(['series_id'])[col].min()
        temp_df[col + '_std'] = df.groupby(['series_id'])[col].std()
        temp_df[col + '_range'] = temp_df[col + '_max'] - temp_df[col + '_min']
        temp_df[col + '_maxtoMin'] = temp_df[col + '_max'] / temp_df[col + '_min']
        temp_df[col + '_mean_abs_chg'] = df.groupby(['series_id'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        temp_df[col + '_abs_min'] = df.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        temp_df[col + '_abs_max'] = df.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        temp_df[col + '_abs_avg'] = (temp_df[col + '_abs_min'] + temp_df[col + '_abs_max'])/2
    return temp_df

In [90]:
train=fe(x)    
train.head()

Unnamed: 0_level_0,orientation_X_mean,orientation_X_median,orientation_X_max,orientation_X_min,orientation_X_std,orientation_X_range,orientation_X_maxtoMin,orientation_X_mean_abs_chg,orientation_X_abs_min,orientation_X_abs_max,orientation_X_abs_avg,orientation_Y_mean,orientation_Y_median,orientation_Y_max,orientation_Y_min,orientation_Y_std,orientation_Y_range,orientation_Y_maxtoMin,orientation_Y_mean_abs_chg,orientation_Y_abs_min,orientation_Y_abs_max,orientation_Y_abs_avg,orientation_Z_mean,orientation_Z_median,orientation_Z_max,orientation_Z_min,orientation_Z_std,orientation_Z_range,orientation_Z_maxtoMin,orientation_Z_mean_abs_chg,orientation_Z_abs_min,orientation_Z_abs_max,orientation_Z_abs_avg,orientation_W_mean,orientation_W_median,orientation_W_max,orientation_W_min,orientation_W_std,orientation_W_range,orientation_W_maxtoMin,...,total_angular_velocity_std,total_angular_velocity_range,total_angular_velocity_maxtoMin,total_angular_velocity_mean_abs_chg,total_angular_velocity_abs_min,total_angular_velocity_abs_max,total_angular_velocity_abs_avg,total_linear_acceleration_mean,total_linear_acceleration_median,total_linear_acceleration_max,total_linear_acceleration_min,total_linear_acceleration_std,total_linear_acceleration_range,total_linear_acceleration_maxtoMin,total_linear_acceleration_mean_abs_chg,total_linear_acceleration_abs_min,total_linear_acceleration_abs_max,total_linear_acceleration_abs_avg,total_xyz_mean,total_xyz_median,total_xyz_max,total_xyz_min,total_xyz_std,total_xyz_range,total_xyz_maxtoMin,total_xyz_mean_abs_chg,total_xyz_abs_min,total_xyz_abs_max,total_xyz_abs_avg,acc_vs_vel_mean,acc_vs_vel_median,acc_vs_vel_max,acc_vs_vel_min,acc_vs_vel_std,acc_vs_vel_range,acc_vs_vel_maxtoMin,acc_vs_vel_mean_abs_chg,acc_vs_vel_abs_min,acc_vs_vel_abs_max,acc_vs_vel_abs_avg
series_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,-0.758666,-0.75853,-0.75822,-0.75953,0.000363,0.00131,0.998275,1.5e-05,0.75822,0.75953,0.758875,-0.634008,-0.63427,-0.63306,-0.63456,0.000471,0.0015,0.997636,1.8e-05,0.63306,0.63456,0.63381,-0.105474,-0.1055,-0.10461,-0.10614,0.000432,0.00153,0.985585,5.1e-05,0.10461,0.10614,0.105375,-0.10647,-0.106555,-0.10559,-0.10705,0.000389,0.00146,0.986362,...,0.028263,0.151781,15.314654,0.024643,0.010603,0.162384,0.086494,9.939676,10.07786,12.575577,7.117152,0.989642,5.458424,1.766939,0.68903,7.117152,12.575577,9.846364,0.933845,0.933813,0.93438,0.933377,0.000304,0.001003,1.001075,3.6e-05,0.933377,0.93438,0.933879,223.282937,187.471573,995.561076,56.133014,135.64151,939.428062,17.735749,114.896199,56.133014,995.561076,525.847045
1,-0.958606,-0.958595,-0.95837,-0.95896,0.000151,0.00059,0.999385,2.3e-05,0.95837,0.95896,0.958665,0.241867,0.24189,0.2427,0.24074,0.000499,0.00196,1.008142,7e-05,0.24074,0.2427,0.24172,0.03165,0.031688,0.032341,0.030504,0.000508,0.001837,1.060222,4.8e-05,0.030504,0.032341,0.031423,-0.146876,-0.14691,-0.14587,-0.14809,0.000521,0.00222,0.985009,...,0.052645,0.267779,14.277849,0.04118,0.020167,0.287947,0.154057,10.082444,10.06995,16.986332,5.867993,1.826016,11.118339,2.894743,1.361058,5.867993,16.986332,11.427163,1.004528,1.004494,1.004888,1.004061,0.000228,0.000828,1.000824,2.3e-05,1.004061,1.004888,1.004474,118.973908,98.477877,611.826712,32.938319,88.48875,578.888393,18.574922,57.943094,32.938319,611.826712,322.382516
2,-0.512057,-0.512035,-0.50944,-0.51434,0.001377,0.0049,0.990473,4.1e-05,0.50944,0.51434,0.51189,-0.846171,-0.84621,-0.8449,-0.84779,0.000785,0.00289,0.996591,2.4e-05,0.8449,0.84779,0.846345,-0.129371,-0.129405,-0.12852,-0.1303,0.000541,0.00178,0.986339,5.9e-05,0.12852,0.1303,0.12941,-0.071082,-0.071139,-0.070378,-0.071535,0.000278,0.001157,0.983826,...,0.033906,0.161048,17.314963,0.023495,0.009871,0.170919,0.090395,10.035741,10.082289,12.7255,6.794325,0.944637,5.931175,1.87296,0.711678,6.794325,12.7255,9.759913,0.921325,0.921301,0.921914,0.920665,0.000378,0.001249,1.001356,4.2e-05,0.920665,0.921914,0.92129,190.134237,146.667138,904.269577,52.834789,132.78965,851.434788,17.115041,83.878164,52.834789,904.269577,478.552183
3,-0.939169,-0.93917,-0.93884,-0.93968,0.000227,0.00084,0.999106,2.6e-05,0.93884,0.93968,0.93926,0.31014,0.310115,0.31147,0.30943,0.000453,0.00204,1.006593,3.6e-05,0.30943,0.31147,0.31045,0.038955,0.038889,0.039799,0.037922,0.000449,0.001877,1.049496,6.6e-05,0.037922,0.039799,0.038861,-0.142319,-0.14251,-0.13934,-0.14437,0.001371,0.00503,0.965159,...,0.104812,0.509122,36.242842,0.078458,0.014446,0.523568,0.269007,10.888094,10.925683,20.833299,2.064233,3.102236,18.769065,10.092512,2.203234,2.064233,20.833299,11.448766,1.008554,1.008549,1.008685,1.00841,6.2e-05,0.000275,1.000273,2e-05,1.00841,1.008685,1.008547,115.134804,84.403391,805.92441,11.305069,112.264757,794.619341,71.288762,76.773215,11.305069,805.92441,408.61474
4,-0.891301,-0.89094,-0.88673,-0.89689,0.002955,0.01016,0.988672,8e-05,0.88673,0.89689,0.89181,0.428144,0.428865,0.4374,0.41646,0.006165,0.02094,1.050281,0.000165,0.41646,0.4374,0.42693,0.060056,0.060113,0.061771,0.058247,0.000985,0.003524,1.060501,3.4e-05,0.058247,0.061771,0.060009,-0.13646,-0.13656,-0.13538,-0.13732,0.000541,0.00194,0.985872,...,0.035394,0.140159,2.533818,0.008691,0.091379,0.231539,0.161459,9.892815,10.00562,11.485482,8.071708,0.765162,3.413774,1.422931,0.521694,8.071708,11.485482,9.778595,1.018738,1.018731,1.0195,1.017889,0.000478,0.001611,1.001583,1.4e-05,1.017889,1.0195,1.018694,66.057675,61.43018,111.588333,39.895232,16.611029,71.693101,2.797034,5.618844,39.895232,111.588333,75.741783


In [91]:
test_df=fe(test)    
test_df.head()

Unnamed: 0_level_0,orientation_X_mean,orientation_X_median,orientation_X_max,orientation_X_min,orientation_X_std,orientation_X_range,orientation_X_maxtoMin,orientation_X_mean_abs_chg,orientation_X_abs_min,orientation_X_abs_max,orientation_X_abs_avg,orientation_Y_mean,orientation_Y_median,orientation_Y_max,orientation_Y_min,orientation_Y_std,orientation_Y_range,orientation_Y_maxtoMin,orientation_Y_mean_abs_chg,orientation_Y_abs_min,orientation_Y_abs_max,orientation_Y_abs_avg,orientation_Z_mean,orientation_Z_median,orientation_Z_max,orientation_Z_min,orientation_Z_std,orientation_Z_range,orientation_Z_maxtoMin,orientation_Z_mean_abs_chg,orientation_Z_abs_min,orientation_Z_abs_max,orientation_Z_abs_avg,orientation_W_mean,orientation_W_median,orientation_W_max,orientation_W_min,orientation_W_std,orientation_W_range,orientation_W_maxtoMin,...,total_angular_velocity_std,total_angular_velocity_range,total_angular_velocity_maxtoMin,total_angular_velocity_mean_abs_chg,total_angular_velocity_abs_min,total_angular_velocity_abs_max,total_angular_velocity_abs_avg,total_linear_acceleration_mean,total_linear_acceleration_median,total_linear_acceleration_max,total_linear_acceleration_min,total_linear_acceleration_std,total_linear_acceleration_range,total_linear_acceleration_maxtoMin,total_linear_acceleration_mean_abs_chg,total_linear_acceleration_abs_min,total_linear_acceleration_abs_max,total_linear_acceleration_abs_avg,total_xyz_mean,total_xyz_median,total_xyz_max,total_xyz_min,total_xyz_std,total_xyz_range,total_xyz_maxtoMin,total_xyz_mean_abs_chg,total_xyz_abs_min,total_xyz_abs_max,total_xyz_abs_avg,acc_vs_vel_mean,acc_vs_vel_median,acc_vs_vel_max,acc_vs_vel_min,acc_vs_vel_std,acc_vs_vel_range,acc_vs_vel_maxtoMin,acc_vs_vel_mean_abs_chg,acc_vs_vel_abs_min,acc_vs_vel_abs_max,acc_vs_vel_abs_avg
series_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,-0.02581,-0.025748,-0.025156,-0.026418,0.000284,0.001262,0.95223,5.5e-05,0.025156,0.026418,0.025787,-0.988644,-0.988645,-0.98854,-0.98873,3.9e-05,0.00019,0.999808,1.5e-05,0.98854,0.98873,0.988635,-0.148006,-0.14801,-0.14748,-0.14872,0.000253,0.00124,0.991662,0.000105,0.14748,0.14872,0.1481,0.003147,0.003134,0.003609,0.002654,0.000233,0.000955,1.35981,...,0.039837,0.221576,21.513522,0.039292,0.010801,0.232378,0.12159,9.988501,9.937717,16.613628,6.133113,1.907297,10.480515,2.708841,1.534203,6.133113,16.613628,11.37337,0.911086,0.911082,0.91146,0.910583,0.00018,0.000877,1.000963,7.4e-05,0.910583,0.91146,0.911022,125.946166,103.231038,902.410168,37.998811,98.622296,864.411357,23.748379,69.077232,37.998811,902.410168,470.20449
1,-0.932288,-0.932115,-0.93148,-0.93372,0.000564,0.00224,0.997601,2.1e-05,0.93148,0.93372,0.9326,0.330271,0.330815,0.33227,0.32661,0.001654,0.00566,1.01733,5e-05,0.32661,0.33227,0.32944,0.043416,0.043395,0.044053,0.042283,0.000326,0.00177,1.041861,3.9e-05,0.042283,0.044053,0.043168,-0.140968,-0.14107,-0.13994,-0.14179,0.000601,0.00185,0.986953,...,0.048464,0.279794,27.971609,0.024851,0.010374,0.290167,0.150271,10.039059,9.967605,13.438388,4.023689,1.157338,9.414698,3.339817,0.684772,4.023689,13.438388,8.731039,1.010771,1.010779,1.010987,1.010342,0.000143,0.000645,1.000639,1.4e-05,1.010342,1.010987,1.010664,157.725288,130.131276,949.323991,20.871618,128.164165,928.452374,45.483968,62.630171,20.871618,949.323991,485.097805
2,-0.230186,-0.2306,-0.22713,-0.23141,0.001054,0.00428,0.981505,9.3e-05,0.22713,0.23141,0.22927,0.961448,0.96136,0.96217,0.96109,0.00026,0.00108,1.001124,3e-05,0.96109,0.96217,0.96163,0.14434,0.144395,0.14511,0.14356,0.000335,0.00155,1.010797,0.000115,0.14356,0.14511,0.144335,-0.042394,-0.042506,-0.041394,-0.042938,0.000363,0.001544,0.964041,...,0.063919,0.299943,12.049258,0.050656,0.027146,0.327088,0.177117,10.425586,10.23194,16.71346,5.497468,2.331304,11.215992,3.04021,1.76151,5.497468,16.71346,11.105464,1.059107,1.059115,1.059356,1.058854,0.000109,0.000502,1.000474,3.7e-05,1.058854,1.059356,1.059105,96.713753,74.451595,373.87543,27.512576,68.136484,346.362854,13.589256,45.578573,27.512576,373.87543,200.694003
3,0.164661,0.16426,0.1675,0.16332,0.001182,0.00418,1.025594,3.7e-05,0.16332,0.1675,0.16541,0.975293,0.975345,0.97551,0.97485,0.000182,0.00066,1.000677,7e-06,0.97485,0.97551,0.97518,0.146153,0.14621,0.14649,0.14582,0.000182,0.00067,1.004595,2.5e-05,0.14582,0.14649,0.146155,0.018096,0.018058,0.018634,0.017746,0.000298,0.000888,1.050039,...,0.018908,0.080541,27.924298,0.007728,0.002991,0.083532,0.043262,9.830262,9.867349,10.855842,8.557229,0.465892,2.298613,1.268617,0.346807,8.557229,10.855842,9.706536,1.060408,1.060429,1.060526,1.060286,6.5e-05,0.00024,1.000226,9e-06,1.060286,1.060526,1.060406,359.499051,262.282895,3422.456849,113.458242,362.702894,3308.998607,30.164903,140.419389,113.458242,3422.456849,1767.957546
4,-0.2536,-0.2539,-0.23637,-0.26938,0.009763,0.03301,0.877459,0.00026,0.23637,0.26938,0.252875,0.955712,0.95572,0.96018,0.9515,0.002578,0.00868,1.009122,6.8e-05,0.9515,0.96018,0.95584,0.142326,0.14219,0.14306,0.14112,0.000562,0.00194,1.013747,2.2e-05,0.14112,0.14306,0.14209,-0.044067,-0.044136,-0.04139,-0.046471,0.001488,0.005081,0.890663,...,0.018451,0.06825,1.376101,0.003784,0.181468,0.249718,0.215593,9.814084,9.814096,10.356973,9.342412,0.20885,1.014561,1.108597,0.102052,9.342412,10.356973,9.849692,1.058359,1.058275,1.058715,1.057846,0.000248,0.000869,1.000821,9e-06,1.057846,1.058715,1.05828,44.202873,42.910919,55.734927,38.67453,4.220007,17.060397,1.441127,0.908165,38.67453,55.734927,47.204728


In [92]:
train.fillna(0, inplace = True)
train.replace(-np.inf, 0, inplace = True)
train.replace(np.inf, 0, inplace = True)
test_df.fillna(0, inplace = True)
test_df.replace(-np.inf, 0, inplace = True)
test_df.replace(np.inf, 0, inplace = True)

In [93]:
train.shape, y['surface'].shape , test_df.shape

((3810, 154), (3810,), (3816, 154))

In [94]:
skf = StratifiedKFold(n_splits=50, shuffle=True, random_state=2019)

In [95]:
preds = np.zeros((test_df.shape[0],9))
measured= np.zeros((train.shape[0]))
total_score = 0   
for n_fold, (train_idx, valid_idx) in enumerate(skf.split(train.values, y['surface'].values)):
    print(n_fold)
    model = RandomForestClassifier(n_estimators = 500, n_jobs = -1)
    model.fit(train.iloc[train_idx],y['surface'][train_idx])
    measured[valid_idx] = model.predict(train.iloc[valid_idx])
    preds += model.predict_proba(test_df)/ 50
    total_score += model.score(train.iloc[valid_idx],y['surface'][valid_idx])
    print("Fold: {} score: {}".format(n_fold,model.score(train.iloc[valid_idx],y['surface'][valid_idx])))
print('Avg Accuracy', total_score / 50)


0
Fold: 0 score: 0.9506172839506173
1
Fold: 1 score: 0.8625
2
Fold: 2 score: 0.9382716049382716
3
Fold: 3 score: 0.8875
4
Fold: 4 score: 0.85
5
Fold: 5 score: 0.8625
6
Fold: 6 score: 0.9375
7
Fold: 7 score: 0.8987341772151899
8
Fold: 8 score: 0.8589743589743589
9
Fold: 9 score: 0.8860759493670886
10
Fold: 10 score: 0.9615384615384616
11
Fold: 11 score: 0.8987341772151899
12
Fold: 12 score: 0.8481012658227848
13
Fold: 13 score: 0.8717948717948718
14
Fold: 14 score: 0.9078947368421053
15
Fold: 15 score: 0.9210526315789473
16
Fold: 16 score: 0.8441558441558441
17
Fold: 17 score: 0.8552631578947368
18
Fold: 18 score: 0.8947368421052632
19
Fold: 19 score: 0.8831168831168831
20
Fold: 20 score: 0.8701298701298701
21
Fold: 21 score: 0.935064935064935
22
Fold: 22 score: 0.8831168831168831
23
Fold: 23 score: 0.8421052631578947
24
Fold: 24 score: 0.9090909090909091
25
Fold: 25 score: 0.8289473684210527
26
Fold: 26 score: 0.8831168831168831
27
Fold: 27 score: 0.9210526315789473
28
Fold: 28 score: 

In [96]:
sub['surface'] = le.inverse_transform(preds.argmax(axis=1))
sub

Unnamed: 0,series_id,surface
0,0,hard_tiles_large_space
1,1,concrete
2,2,tiled
3,3,carpet
4,4,soft_tiles
5,5,concrete
6,6,soft_pvc
7,7,concrete
8,8,wood
9,9,wood


In [97]:
sub.to_csv('submission.csv', index=False)