In [1]:
from pathlib import Path

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.optim.lr_scheduler import _LRScheduler

In [3]:
# ROOT = Path.cwd().parent/'input'
ROOT = Path.home()/'data'/'careercon2019'

In [4]:
SAMPLE = ROOT/'sample_submission.csv'
TRAIN = ROOT/'X_train.csv'
TARGET = ROOT/'y_train.csv'
TEST = ROOT/'X_test.csv'

ID_COLS = ['series_id', 'measurement_number']

x_cols = {
    'series_id': np.uint32,
    'measurement_number': np.uint32,
    'orientation_X': np.float32,
    'orientation_Y': np.float32,
    'orientation_Z': np.float32,
    'orientation_W': np.float32,
    'angular_velocity_X': np.float32,
    'angular_velocity_Y': np.float32,
    'angular_velocity_Z': np.float32,
    'linear_acceleration_X': np.float32,
    'linear_acceleration_Y': np.float32,
    'linear_acceleration_Z': np.float32
}

y_cols = {
    'series_id': np.uint32,
    'group_id': np.uint32,
    'surface': str
}

In [5]:
x_trn = pd.read_csv(TRAIN, usecols=x_cols.keys(), dtype=x_cols)
x_tst = pd.read_csv(TEST, usecols=x_cols.keys(), dtype=x_cols)
y_trn = pd.read_csv(TARGET, usecols=y_cols.keys(), dtype=y_cols)

In [6]:
def add_euler_angles(df):
    """Adds Euler angles features to the dataset."""
    
    x, y, z, w = [df[f'orientation_{s}'] for s in list('XYZW')]
    nx, ny, nz = quaternion_to_euler(x, y, z, w)
    df['euler_X'] = nx
    df['euler_Y'] = ny
    df['euler_Z'] = nz
    return df

In [7]:
def quaternion_to_euler(x, y, z, w):
    """Converts quaternion values into Euler angles (roll, pitch and yaw)."""
    
    t0 = 2.0*(w*x + y*z)
    t1 = 1.0 - 2.0*(x*x + y*y)
    X = np.arctan2(t0, t1)
    
    t2 = np.clip(2.0*(w*y - z*x), -1, 1)
    Y = np.arcsin(t2)
    
    t3 = 2.0*(w*z + x*y)
    t4 = 1.0 - 2.0*(y*y + z*z)
    Z = np.arctan2(t3, t4)
    
    return X, Y, Z

In [8]:
def startswith(df, prefix):
    return df.columns[df.columns.str.startswith(prefix)].tolist()

In [9]:
data = pd.concat([x_trn, x_tst], axis=0).reset_index(drop=True)

In [10]:
data = add_euler_angles(data)

In [11]:
data = data.drop(columns=['measurement_number'] + startswith(data, 'orient'))

In [12]:
euler_cols = startswith(data, 'euler')
linear_cols = startswith(data, 'linear') 
angular_cols = startswith(data, 'angular')

In [13]:
data.sample(5).T

Unnamed: 0,530711,561100,539579,363522,726818
series_id,336.0,573.0,405.0,2840.0,1868.0
angular_velocity_X,0.32507,-0.079726,0.001113,0.00221,-0.025326
angular_velocity_Y,-0.092451,0.012845,0.043224,0.063437,0.008647
angular_velocity_Z,-0.073349,-0.026648,-0.10454,0.056979,-0.037716
linear_acceleration_X,0.76901,1.0259,-0.086337,-0.016545,-0.35746
linear_acceleration_Y,3.7963,1.1466,3.0587,2.6707,2.8771
linear_acceleration_Z,-4.8346,-10.614,-9.5006,-8.8855,-9.2629
euler_X,2.843421,2.835299,2.842177,2.841404,2.838207
euler_Y,-0.010106,-0.015245,-0.011932,-0.014216,-0.019533
euler_Z,-2.080324,1.962196,0.900356,-1.486659,3.056287


In [86]:
df = pd.DataFrame({
    'uid': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
    'x1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'x2': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'y': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120],
    'z': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200]
})

In [90]:
arr = df.values.reshape((4, 3, 5))

In [91]:
arr

array([[[   1,    1,    1,   10,  100],
        [   1,    2,    2,   20,  200],
        [   1,    3,    3,   30,  300]],

       [[   2,    4,    4,   40,  400],
        [   2,    5,    5,   50,  500],
        [   2,    6,    6,   60,  600]],

       [[   3,    7,    7,   70,  700],
        [   3,    8,    8,   80,  800],
        [   3,    9,    9,   90,  900]],

       [[   4,   10,   10,  100, 1000],
        [   4,   11,   11,  110, 1100],
        [   4,   12,   12,  120, 1200]]])

In [93]:
arr[:,:,1]

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [94]:
np.mean(arr[:,:,1])

6.5

In [71]:
df.groupby('uid')[list('xyz')].apply(lambda df: df.apply(abs_fft)).reset_index('uid')

Unnamed: 0,uid,x,y,z
0,1,23.0,230.0,2300.0
1,1,9.547725,95.477251,954.772505
2,1,4.67343,46.734298,467.342983
2,2,11.0,110.0,1100.0
7,2,5.0,50.0,500.0
4,3,11.0,110.0,1100.0
5,3,1.0,10.0,100.0


In [40]:
def abs_fft(arr): return np.abs(np.fft.rfft(arr))

In [73]:
def zero_mean(x): return x - x.mean()

In [74]:
def zscore(x): return (x - x.mean())/x.std()

In [75]:
groups = data.groupby('series_id')

In [76]:
data[euler_cols] = groups[euler_cols].diff().fillna(0)
data[linear_cols] = groups[linear_cols].transform(zero_mean)
data[angular_cols] = groups[angular_cols].transform(zero_mean)

In [101]:
fft_data = (
    groups[linear_cols + angular_cols]
    .apply(lambda df: df.apply(abs_fft, axis=0))
    .reset_index('series_id'))

In [96]:
cols = euler_cols + linear_cols + angular_cols
data[cols] = data[cols].apply(zscore, axis=0)

In [102]:
cols = linear_cols + angular_cols
fft_data[cols] = fft_data[cols].apply(zscore, axis=0)