In [None]:
import pandas as pd
import numpy as np
import gc

# Gradient Boosting
import lightgbm as lgb

# Scikit-learn
from sklearn.model_selection import StratifiedKFold 
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc

from bayes_opt import BayesianOptimization

# Graphics
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Metrics
from sklearn.metrics import average_precision_score, roc_auc_score, mean_absolute_error

from sklearn import preprocessing

import os
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn

In [None]:
x = pd.read_csv('../input/X_train.csv')
y = pd.read_csv('../input/y_train.csv')
test = pd.read_csv('../input/X_test.csv')
sub = pd.read_csv('../input/sample_submission.csv')

In [None]:
x.head()

In [None]:
y.head()

In [None]:
test.head()

In [None]:
x.shape, y.shape , test.shape

In [None]:
x['series_id'].nunique()

In [None]:
x[x.isnull().any(axis=1)]

In [None]:
y[y.isnull().any(axis=1)]

In [None]:
test[test.isnull().any(axis=1)]

In [None]:
le = LabelEncoder()
le.fit(y['surface'])
y['surface'] = le.transform(y['surface'])

In [None]:
y.head()

In [None]:
#FE from https://www.kaggle.com/vanshjatana/help-humanity-by-helping-robots-4e306b
# https://stackoverflow.com/questions/53033620/how-to-convert-euler-angles-to-quaternions-and-get-the-same-euler-angles-back-fr?rq=1
def quaternion_to_euler(x, y, z, w):
    import math
    t0 = +2.0 * (w * x + y * z)
    t1 = +1.0 - 2.0 * (x * x + y * y)
    X = math.atan2(t0, t1)

    t2 = +2.0 * (w * y - z * x)
    t2 = +1.0 if t2 > +1.0 else t2
    t2 = -1.0 if t2 < -1.0 else t2
    Y = math.asin(t2)

    t3 = +2.0 * (w * z + x * y)
    t4 = +1.0 - 2.0 * (y * y + z * z)
    Z = math.atan2(t3, t4)

    return X, Y, Z

def fe(df):
    df['total_angular_velocity'] = (df['angular_velocity_X'] ** 2 + df['angular_velocity_Y'] ** 2 + df['angular_velocity_Z'] ** 2) ** 0.5
    df['total_linear_acceleration'] = (df['linear_acceleration_X'] ** 2 + df['linear_acceleration_Y'] ** 2 + df['linear_acceleration_Z'] ** 2) ** 0.5
    
    df['acc_vs_vel'] = df['total_linear_acceleration'] / df['total_angular_velocity']
    
    x, y, z, w = df['orientation_X'].tolist(), df['orientation_Y'].tolist(), df['orientation_Z'].tolist(), df['orientation_W'].tolist()
    nx, ny, nz = [], [], []
    for i in range(len(x)):
        xx, yy, zz = quaternion_to_euler(x[i], y[i], z[i], w[i])
        nx.append(xx)
        ny.append(yy)
        nz.append(zz)
    
    df['euler_x'] = nx
    df['euler_y'] = ny
    df['euler_z'] = nz
    
    df['total_angle'] = (df['euler_x'] ** 2 + df['euler_y'] ** 2 + df['euler_z'] ** 2) ** 5
    df['angle_vs_acc'] = df['total_angle'] / df['total_linear_acceleration']
    df['angle_vs_vel'] = df['total_angle'] / df['total_angular_velocity']
    temp_df = pd.DataFrame()
    for col in df.columns[3:]:
        temp_df[col + '_mean'] = df.groupby(['series_id'])[col].mean()
        temp_df[col + '_max'] = df.groupby(['series_id'])[col].max()
        temp_df[col + '_min'] = df.groupby(['series_id'])[col].min()
        temp_df[col + '_std'] = df.groupby(['series_id'])[col].std()
        temp_df[col + '_maxtoMin'] = temp_df[col + '_max'] / temp_df[col + '_min']
        temp_df[col + '_mean_abs_chg'] = df.groupby(['series_id'])[col].apply(lambda x: np.mean(np.abs(np.diff(x))))
        temp_df[col + '_abs_min'] = df.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        temp_df[col + '_abs_max'] = df.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        temp_df[col + '_abs_avg'] = (temp_df[col + '_abs_min'] + temp_df[col + '_abs_max'])/2
    return temp_df

In [None]:
train=fe(x)    
train.head()

In [None]:
test_df=fe(test)    
test_df.head()

In [None]:
train.fillna(0, inplace = True)
train.replace(-np.inf, 0, inplace = True)
train.replace(np.inf, 0, inplace = True)
test_df.fillna(0, inplace = True)
test_df.replace(-np.inf, 0, inplace = True)
test_df.replace(np.inf, 0, inplace = True)

In [None]:
train.shape, y['surface'].shape , test_df.shape

In [None]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=20)

In [None]:
preds = np.zeros((test_df.shape[0],9))
measured= np.zeros((train.shape[0]))
total_score = 0   
for n_fold, (train_idx, valid_idx) in enumerate(skf.split(train.values, y['surface'].values)):
    print(n_fold)
    model = RandomForestClassifier(n_estimators=500)
    model.fit(train.iloc[train_idx],y['surface'][train_idx])
    measured[valid_idx] = model.predict(train.iloc[valid_idx])
    preds += model.predict_proba(test_df)/ 10
    total_score += model.score(train.iloc[valid_idx],y['surface'][valid_idx])
    print("Fold: {} score: {}".format(n_fold,model.score(train.iloc[valid_idx],y['surface'][valid_idx])))
print('Avg Accuracy', total_score / 10)


In [None]:
sub['surface'] = le.inverse_transform(preds.argmax(axis=1))
sub

In [None]:
sub.to_csv('submission.csv', index=False)