In [82]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

import warnings
warnings.filterwarnings('ignore')

path = 'C:/Users/STRUSI Cosimo/Documents/Uni/Consegna Ramat/projectApprendimento/A_DeviceMotion_data/'

In [83]:
# Setting up lists for importing the data.

list_of_folders = os.listdir(path)
list_of_directories = [path + i for i in list_of_folders]
list_of_dfs = []

activities = ['dws', 'ups', 'sit', 'std', 'wlk', 'jog']

# Iterating through the directories and appending the dataframes for each subject in each of the trials to a list for further use.

for i in list_of_directories:
    list_of_csv = os.listdir(i)
    for j in list_of_csv:
        k = pd.read_csv(i + '/' + j).rename(columns={'Unnamed: 0': 'time'})
        k['subject'] = int(j.split('_')[1].split('.')[0])
        k['activity'] = i.split('/')[8].split('_')[0]
        k['trial'] = int(i.split('/')[8].split('_')[1])
        k.insert(0, 'accel_x', k['userAcceleration.x'] + k['gravity.x'])
        k.insert(1, 'accel_y', k['userAcceleration.y'] + k['gravity.y'])
        k.insert(2, 'accel_z', k['userAcceleration.z'] + k['gravity.z'])
        k.insert(3, 'accel_norm', np.sqrt(k['accel_x']**2 + k['accel_y']**2 + k['accel_z']**2))
        list_of_dfs.append(k)

# Creating one big dataframe (df) from the list of all individual dataframes. Dropping unnecessary columns and renaming the ones for rotation.

df = pd.concat(list_of_dfs).set_index('time')
df = df.drop(['attitude.roll', 'attitude.pitch', 'attitude.yaw', 'gravity.x', 'gravity.y', 'gravity.z', 'userAcceleration.x', 'userAcceleration.y', 'userAcceleration.z'], axis=1)
df = df.rename(columns={'rotationRate.x': 'gyro_x', 'rotationRate.y': 'gyro_y', 'rotationRate.z': 'gyro_z'})
print(df)

       accel_x   accel_y   accel_z  accel_norm    gyro_x    gyro_y    gyro_z  \
time                                                                           
0     1.036789  0.485275  0.345870    1.195847  0.316738  0.778180  1.082764   
1     0.972504  0.692962  0.082611    1.196990  0.842032  0.424446  0.643574   
2     0.770325  0.784256 -0.200515    1.117437 -0.138143 -0.040741  0.343563   
3     0.752320  0.784576  0.053818    1.088320 -0.025005 -1.048717  0.035860   
4     0.959503  1.001206 -0.102829    1.390551  0.114253 -0.912890  0.047341   
...        ...       ...       ...         ...       ...       ...       ...   
4749  0.251328  0.759232  0.037216    0.800615 -0.920226  0.406762 -0.383194   
4750  0.265595  0.745987  0.076386    0.795533 -0.923594  0.231559 -0.521862   
4751  0.181992  0.919510  0.110749    0.943867 -0.970054  0.003083 -0.621238   
4752  0.166809  0.974457  0.083908    0.992186 -1.005371 -0.108297 -0.708937   
4753  0.149765  0.992234  0.044785    1.

In [84]:
from sensplit.dataframe_splitter import DataFrameSplitter

dfs = DataFrameSplitter(method="trials")

train_data, test_data = dfs.train_test_split(dataset = df,
                                             labels = ("subject","trial"), 
                                             trial_col='subject', 
                                             train_trials=[1.,3.,4.,6.,7.,8.,10.,11.,12.,14.,15.,16.,17.,18.,19.,20.,21.,22.,24.],
                                             verbose=2)
train_data.shape, test_data.shape

Seg_Shape:(790, 10) | TrainData:(1135404, 10) | TestData:(277461, 10) | ('subject', 'trial'):(24, 16) | progress:100%.

((1135404, 10), (277461, 10))

In [85]:
from sensplit.dataframe_splitter import DataFrameSplitter

dfs = DataFrameSplitter(method="trials")

train_data, val_data = dfs.train_test_split(dataset = train_data,
                                             labels = ("subject","trial"), 
                                             trial_col='subject', 
                                             train_trials=[1.,4.,6.,8.,10.,11.,12.,14.,16.,18.,19.,20.,21.,24.],
                                             verbose=2)
train_data.shape, val_data.shape

Seg_Shape:(790, 10) | TrainData:(840565, 10) | TestData:(294839, 10) | ('subject', 'trial'):(24, 16) | progress:100%.

((840565, 10), (294839, 10))

In [86]:
prova = train_data.groupby(["trial", "activity"])["accel_x"].count()
prova2 = test_data.groupby(["trial", "activity"])["accel_x"].count()
prova3 = val_data.groupby(["trial", "activity"])["accel_x"].count()

print(prova)
print(prova2)
print(prova3)

trial  activity
1      dws          30086
2      dws          35253
3      ups          34454
4      ups          41460
5      sit         135503
6      std         139443
7      wlk          93510
8      wlk          72343
9      jog          62261
11     dws          13257
12     ups          17121
13     sit          65273
14     std          46940
15     wlk          35394
16     jog          18267
Name: accel_x, dtype: int64
trial  activity
1      dws         10324
2      dws         11950
3      ups         11169
4      ups         14509
5      sit         45961
6      std         43620
7      wlk         31489
8      wlk         23432
9      jog         20600
11     dws          4320
12     ups          5227
13     sit         19394
14     std         13845
15     wlk         16156
16     jog          5465
Name: accel_x, dtype: int64
trial  activity
1      dws          9836
2      dws         11905
3      ups         12581
4      ups         15103
5      sit         53193
6     

In [89]:
train_data.to_csv("TrainData1.csv")
test_data.to_csv("TestData1.csv")
val_data.to_csv("ValData1.csv")