### Goal: 
to predict motion based on time series data for various motion types

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/motionsense-dataset/data_subjects_info.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/__MACOSX/A_DeviceMotion_data/wlk_7/._sub_13.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/__MACOSX/A_DeviceMotion_data/jog_9/._sub_5.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/__MACOSX/A_DeviceMotion_data/jog_9/._sub_4.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/__MACOSX/A_DeviceMotion_data/dws_1/._sub_1.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/dws_2/sub_20.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/dws_2/sub_13.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/dws_2/sub_18.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/dws_2/sub_8.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/dws_2/sub_1.csv
/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/dws_2/sub_3

Load the subject csv file

In [2]:
df_subjects = pd.read_csv('/kaggle/input/motionsense-dataset/data_subjects_info.csv')
df_subjects.head()

Unnamed: 0,code,weight,height,age,gender
0,1,102,188,46,1
1,2,72,180,28,1
2,3,48,161,28,0
3,4,90,176,31,1
4,5,48,164,23,0


In [3]:
df_subjects.shape

(24, 5)

As the instruction file mentioned, there are 6 motion types:

1. dws: downstairs
2. ups: upstairs
3. sit: sitting
4. std: standing
5. wlk: walking
6. jog: jogging

In [4]:
motion_types = ["dws", "ups", "sit", "std", "wlk", "jog"]

import glob

# Loop through motion types
for motion in motion_types:
    
    # Just list one for testing
    if (motion=='dws'):
        file_long = "/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/" + motion + "_[0-9]/*.csv"
        file_short = "/kaggle/input/motionsense-dataset/A_DeviceMotion_data/A_DeviceMotion_data/" + motion + "_1[0-9]/*.csv"

        files_long = glob.glob(file_long)
        files_short = glob.glob(file_short)

Looking at one of the subjects as a test:

In [5]:
df_test = pd.read_csv(files_short[0], index_col = 0)
df_test.head()

Unnamed: 0,attitude.roll,attitude.pitch,attitude.yaw,gravity.x,gravity.y,gravity.z,rotationRate.x,rotationRate.y,rotationRate.z,userAcceleration.x,userAcceleration.y,userAcceleration.z
0,-1.268206,-1.372092,0.361348,-0.188423,0.980323,-0.058821,-0.228163,-0.487235,-0.483135,0.321953,-0.014793,0.015898
1,-1.317775,-1.361761,0.321994,-0.2009,0.978232,-0.051945,-0.286251,-0.570325,-0.699216,0.347354,-0.017935,0.182789
2,-1.384831,-1.345995,0.26405,-0.219061,0.974838,-0.041214,-0.541399,-0.472189,-1.042954,0.377722,-0.040207,0.183823
3,-1.465241,-1.323203,0.188143,-0.2437,0.969505,-0.02582,-0.800513,-0.261303,-1.340932,0.453264,-0.110664,0.032152
4,-1.54342,-1.297331,0.113412,-0.269963,0.962841,-0.007392,-0.904708,-0.389954,-1.312653,0.576344,0.065037,-0.123055


Ideas:
- analyze the time series data with ARIMA techniques
- feed features into neural network to identify the motion

In [6]:
from statsmodels.api import tsa
from scipy.signal import periodogram

In [12]:
# Extract features 
def my_features(df):
    # Use describe() method in pandas to obtain most of the features
    df_summary = df_test.describe().transpose()
    df_summary['iqr'] = df_summary['75%'] - df_summary['25%']
    df_summary['kurtosis'] = df.kurtosis(axis=0)
    df_summary['skewness'] = df.skew(axis=0)
    
    return df_summary

In [13]:
print(my_features(df_test))

                    count      mean       std       min       25%       50%  \
attitude.roll       900.0  0.727966  1.007469 -3.140159  0.535299  0.801697   
attitude.pitch      900.0 -1.248181  0.165179 -1.567170 -1.359451 -1.286249   
attitude.yaw        900.0 -0.811605  1.498134 -3.122679 -2.057853 -1.275306   
gravity.x           900.0  0.192259  0.137578 -0.311523  0.133163  0.221345   
gravity.y           900.0  0.935688  0.060405  0.740605  0.909218  0.959789   
gravity.z           900.0 -0.175890  0.184573 -0.597772 -0.333513 -0.140111   
rotationRate.x      900.0 -0.031514  1.590247 -3.893512 -1.015669 -0.323885   
rotationRate.y      900.0 -0.357005  1.120977 -4.641148 -1.034303 -0.314097   
rotationRate.z      900.0  0.093603  1.061756 -2.688237 -0.596335 -0.055337   
userAcceleration.x  900.0  0.021938  0.323103 -1.421068 -0.186359  0.016449   
userAcceleration.y  900.0  0.035984  0.469142 -1.013496 -0.282239 -0.039593   
userAcceleration.z  900.0  0.039414  0.273445 -1.116

In [None]:
# Return coefficients for the top n frequencies (default 3)
def my_period(df, variable, n=3):
    
    # Decompose the time series
    ts_variable = df[variable]
    f, PSD = periodogram(ts_variable)

    # Show the periodogram
    plt.plot(f, PSD)
    plt.xlim([0,0.2])
    plt.show()

    # Show the largest three maxima
    index = np.argsort(PSD)[-n:]
    print(f'Indices: {index}')
    print(f'Maximum on periodogram: {PSD[index]} at {f[index]}')
    
    return PSD[index], f[index]

In [None]:
# Get the peak from periodogram
# Then infer periodicity

PSD_roll, f_roll = my_period(df_test, 'attitude.roll')
period_attitude_roll = int(1/f_roll[np.argmax(PSD_roll)])

dc_att_roll = tsa.seasonal_decompose(df_test['attitude.roll'], model='additive', period = period_attitude_roll)

dc_att_roll.plot()
plt.show()