# Imports
We are using pathlib library for navigating file system

In [None]:
import numpy as np
import pandas as pd

In [None]:
from pathlib import Path

In [None]:
PATH = Path('../input')

In [None]:
X_train = pd.read_csv(PATH/'X_train.csv')
X_test = pd.read_csv(PATH/'X_test.csv')
y_train = pd.read_csv(PATH/'y_train.csv')

In [None]:
y_train.head()

In [None]:
X_train.head()

In [None]:
X_test.head()

# This feature engineering is mostly taken from other kaggle kernels

In [None]:
import math
def quarternion_to_euler(x, y, z, w):
    t0 = +2.0 * (w*x + y*z)
    t1 = +1.0 - 2.0*(x*x + y*y)
    X = math.atan2(t0, t1)
    
    t2 = +2.0 * (w*y - z*x)
    t2 = +1.0 if t2>+1.0 else t2
    t2 = -1.0 if t2<-1.0 else t2
    Y = math.asin(t2)
    
    t3 = +2.0 * (w*z + x*y)
    t4 = +1.0 - 2.0 * (y*y + z*z)
    Z = math.atan2(t3, t4)
    return X, Y, Z

In [None]:
def feature_engineering(df):
    new_df = pd.DataFrame()
    df['total_angular_velocity'] = (df['angular_velocity_X']**2 + df['angular_velocity_Y']**2 + df['angular_velocity_Z']**2)**0.5
    df['total_linear_acceleration'] = (df['linear_acceleration_X']**2 + df['linear_acceleration_Y']**2 + df['linear_acceleration_Z']**2)**0.5
    df['acc_vs_velocity'] = df['total_linear_acceleration']/df['total_angular_velocity']
    x, y, z, w = df['orientation_X'].tolist(), df['orientation_Y'].tolist(), df['orientation_Z'].tolist(), df['orientation_W'].tolist()
    nx, ny, nz = [], [], []
    for i in range(len(x)):
        xx, yy, zz = quarternion_to_euler(x[i], y[i], z[i], w[i])
        nx.append(xx)
        ny.append(yy)
        nz.append(zz)
    df['euler_x'] = nx
    df['euler_y'] = ny
    df['euler_z'] = nz
    
    df['total_angle'] = (df['euler_x']**2 + df['euler_y']**2 + df['euler_z']**2)**0.5
    df['angle_vs_acc'] = df['total_angle']/df['total_linear_acceleration']
    df['angle_vs_vel'] = df['total_angle']/df['total_angular_velocity']
    
    def mean_change_of_abs_change(x):
        return np.mean(np.diff(np.abs(np.diff(x))))
    def mean_abs_change(x):
        return np.mean(np.abs(np.diff(x)))
    
    for col in df.columns:
        if col in ['row_id', 'series_id', 'measurement_number']:
            continue
        new_df[col + '_mean'] = df.groupby(['series_id'])[col].mean()
        new_df[col + '_min'] = df.groupby(['series_id'])[col].min()
        new_df[col + '_max'] = df.groupby(['series_id'])[col].max()
        new_df[col + '_std'] = df.groupby(['series_id'])[col].std()
        new_df[col + '_max_to_min'] = new_df[col + '_max']/new_df[col + '_min']
        new_df[col + '_mean_abs_change'] = df.groupby(['series_id'])[col].apply(mean_abs_change)
        new_df[col + 'mean_change_of_abs_change'] = df.groupby(['series_id'])[col].apply(mean_change_of_abs_change)
        new_df[col + '_abs_max'] = df.groupby(['series_id'])[col].apply(lambda x: np.max(np.abs(x)))
        new_df[col + '_abs_min'] = df.groupby(['series_id'])[col].apply(lambda x: np.min(np.abs(x)))
        return new_df

In [None]:
X_train_new = feature_engineering(X_train)
X_test_new = feature_engineering(X_test)

In [None]:
X_train_new.head()

In [None]:
X_train_new.shape

In [None]:
X_test_new.shape

## As the y_train['surface'] is categorical, we will use sklearn's LabelEncoder() function to conver these into numbers that can be passed into xgboost

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()
y_train['surface'] = le.fit_transform(y_train['surface'])

In [None]:
y_train.head()

In [None]:
X_train_new.fillna(0, inplace=True)
X_train_new.replace(-np.inf, 0, inplace=True)
X_train_new.replace(np.inf, 0, inplace=True)
X_test_new.fillna(0, inplace=True)
X_test_new.replace(-np.inf, 0, inplace=True)
X_test_new.replace(np.inf, 0, inplace=True)

In [None]:
from sklearn.model_selection import StratifiedKFold

In [None]:
folds = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# The XGBClassifier has following parameters:

n_estimators: Number of trees which we should use
max_depth: maximum depth the tree should go to
learning_rate: learning rate to be used
objective: The objective to use xgboost. Here we have multiclass classification. So we should use *multi:softmax*

## For training, we will use the fit function and prediction, we will use predict_proba() function to get the probabilities of the class

In [None]:
from xgboost import XGBClassifier

In [None]:
sub_preds = np.zeros((X_test_new.shape[0], 9))
oof_preds = np.zeros((X_train_new.shape[0]))
score = 0
for fold, (train_idx, valid_idx) in enumerate(folds.split(X_train_new, y_train['surface'])):
    eval_set = [(X_train_new.iloc[valid_idx], y_train['surface'][valid_idx])]
    xgb = XGBClassifier(n_estimators=400, max_depth=5, learning_rate=0.1, objective='multi:softmax',
                        gamma=0.001, n_jobs=-1)
    xgb.fit(X_train_new.iloc[train_idx], y_train['surface'][train_idx], eval_set=eval_set,
            early_stopping_rounds=200, verbose=True)
    oof_preds[valid_idx] = xgb.predict(X_train_new.iloc[valid_idx])
    sub_preds+=xgb.predict_proba(X_test_new)/folds.n_splits
    score+=xgb.score(X_train_new.iloc[valid_idx], y_train['surface'][valid_idx])
    print(f'Fold: {fold} score: {xgb.score(X_train_new.iloc[valid_idx], y_train["surface"][valid_idx])}')
    print('Avg Accuracy', score/folds.n_splits)

# Submission

We will take the class which will have the maximum probability among the 9 classes.
For this, we will use the argmax() function of the numpy library.

In [None]:
submission = pd.read_csv(PATH/'sample_submission.csv')
submission['surface'] = le.inverse_transform(sub_preds.argmax(axis=1))
submission.to_csv('submission.csv', index=False)