In [1]:
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6023581/

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import timeit

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")


In [3]:
tr_time = pd.read_csv("train_time_series.csv")
tr_lab = pd.read_csv("train_labels.csv")

In [4]:
tr_time = tr_time.drop(columns = ["accuracy", "UTC time", "timestamp"]).rename(columns = {"Unnamed: 0":"measurement"})
tr_lab = pd.read_csv("train_labels.csv").rename(columns = {"Unnamed: 0":"measurement"}).drop(columns = ["UTC time", "timestamp"])

In [5]:
tr_time

Unnamed: 0,measurement,x,y,z
0,20586,-0.006485,-0.934860,-0.069046
1,20587,-0.066467,-1.015442,0.089554
2,20588,-0.043488,-1.021255,0.178467
3,20589,-0.053802,-0.987701,0.068985
4,20590,-0.054031,-1.003616,0.126450
...,...,...,...,...
3739,24325,0.024384,-0.710709,0.030304
3740,24326,0.487228,-1.099136,-0.015213
3741,24327,0.369446,-0.968506,0.036713
3742,24328,0.167877,-0.802826,0.049805


In [6]:
tr_lab

Unnamed: 0,measurement,label
0,20589,1
1,20599,1
2,20609,1
3,20619,1
4,20629,1
...,...,...
370,24289,4
371,24299,4
372,24309,4
373,24319,4


In [7]:
def accel_mag(accels):
    """takes an array containing x,y,z accelerations, calculates their magnitude"""
    return np.sqrt(np.sum(accels**2))

def rotation(a):
    """for roll use a=y, b=z; for pitch use a=x, b=z; for yaw use a=y, b=x"""
    return np.arctan(a)

In [8]:
#if the above doesn't work aggregate the x's, the y's, the z's togehter - maybe use a sum or mean? then use x, y,z as input X for machine learning
start = timeit.default_timer()
previous = tr_time.measurement[0]

for i in tr_lab.measurement:
    boolinds = pd.Series((tr_time.measurement.values <= i) & (tr_time.measurement.values > previous))
    step = tr_time[boolinds.values]
    
    #calculate magnitude of acceleration and variation of acceleration for the step - maybe delete later    
    accels = step.loc[:,("x","y","z")]
    mags = accels.apply(accel_mag, axis="columns")

    
    #calculate rolls, pitch, yaws
    rolls = rotation([accels.y, accels.z])
    pitches = rotation([accels.x, accels.z])
    yaws = rotation([accels.y, accels.x])
    
    #mean mag
    tr_lab.loc[tr_lab.measurement == i, "mean_accel_mag"] = np.mean(mags)
    #SD mag
    tr_lab.loc[tr_lab.measurement == i, "SD_mag"] = np.std(mags)
    #mag var
    tr_lab.loc[tr_lab.measurement == i, "var_mag"] = np.var(mags)
    #Coeff of Variation / Relative SD
    tr_lab.loc[tr_lab.measurement == i, "RSD_mag"] = np.std(mags)/np.mean(mags)
    #mag min
    tr_lab.loc[tr_lab.measurement == i, "min_mag"] = np.amin(mags)
    #mag max
    tr_lab.loc[tr_lab.measurement == i, "max_mag"] = np.amax(mags)
    # mag 25, 50, 75 percentile
#     tr_lab.loc[tr_lab.measurement == i, "per_25_mag"] = np.percentile(mags,25)
#     tr_lab.loc[tr_lab.measurement == i, "per_50_mag"] = np.percentile(mags,50)
#     tr_lab.loc[tr_lab.measurement == i, "per_75_mag"] = np.percentile(mags,75)
#     #mean rotations
#     tr_lab.loc[tr_lab.measurement == i, "mean_roll"] = np.mean(rolls)
#     tr_lab.loc[tr_lab.measurement == i, "mean_pitch"] = np.mean(pitches)
#     tr_lab.loc[tr_lab.measuiirement == i, "mean_yaw"] = np.mean(yaws)
#     #SD rotations
#     tr_lab.loc[tr_lab.measurement == i, "SD_roll"] = np.std(rolls)
#     tr_lab.loc[tr_lab.measurement == i, "SD_pitch"] = np.std(pitches)
#     tr_lab.loc[tr_lab.measurement == i, "SD_yaw"] = np.std(yaws)
    previous = i
stop = timeit.default_timer()
print('Time: ', stop - start)  

20589
20599
20609
20619
20629
20639
20649
20659
20669
20679
20689
20699
20709
20719
20729
20739
20749
20759
20769
20779
20789
20799
20809
20819
20829
20839
20849
20859
20869
20879
20889
20899
20909
20919
20929
20939
20949
20959
20969
20979
20989
20999
21009
21019
21029
21039
21049
21059
21069
21079
21089
21099
21109
21119
21129
21139
21149
21159
21169
21179
21189
21199
21209
21219
21229
21239
21249
21259
21269
21279
21289
21299
21309
21319
21329
21339
21349
21359
21369
21379
21389
21399
21409
21419
21429
21439
21449
21459
21469
21479
21489
21499
21509
21519
21529
21539
21549
21559
21569
21579
21589
21599
21609
21619
21629
21639
21649
21659
21669
21679
21689
21699
21709
21719
21729
21739
21749
21759
21769
21779
21789
21799
21809
21819
21829
21839
21849
21859
21869
21879
21889
21899
21909
21919
21929
21939
21949
21959
21969
21979
21989
21999
22009
22019
22029
22039
22049
22059
22069
22079
22089
22099
22109
22119
22129
22139
22149
22159
22169
22179
22189
22199
22209
22219
22229
22239
2224

In [26]:
step

Unnamed: 0,measurement,x,y,z
1,20587,-0.066467,-1.015442,0.089554
2,20588,-0.043488,-1.021255,0.178467
3,20589,-0.053802,-0.987701,0.068985
4,20590,-0.054031,-1.003616,0.126450
5,20591,-0.010880,-0.967575,0.170898
...,...,...,...,...
739,21325,0.345886,-1.150284,-0.279800
740,21326,0.206909,-1.336136,0.169266
741,21327,0.170303,-1.256149,0.432053
742,21328,0.185028,-0.704224,-0.020248


In [55]:
tr_time.measurement[0]

20586