In [1]:
# https://archive.ics.uci.edu/ml/datasets/opportunity+activity+recognition

In [2]:
import matplotlib as plt
import pandas as pd
import os
import numpy as np
import math
import csv
from pandas.compat import StringIO
from sklearn import preprocessing
from tqdm import tqdm

In [3]:
url = "/home/avijoychakma/Downloads/PerCom 2020/Dataset/OpportunityUCIDataset/dataset/"

# OLD and dataset accelerometer values are in thousends 
    - Locomotion Annotation: Stand, Walk, Sit, Lie
    Column Information:
    1: Milisec
    38-46: Back IMU
    51-59: Right Upper Arm
    64-72: Right Lower Arm
    77-85: Left Upper Arm
    90-98: Left Lower Arm
    103-117: Left Shoe
    119-133: Right Shoe
    244: Locomotion
    
    Shoe accumulated data is different than the Back, Left, right arm. Leave shoe data seperate.

### Directory Structure
    - 4 user
    - 5 runs for each user

### Locomotion Information
    1   -   Locomotion   -   Stand
    2   -   Locomotion   -   Walk
    4   -   Locomotion   -   Sit
    5   -   Locomotion   -   Lie

### Label Mapping
    - Stand: 1
    - Walk: 3
    - Sit: 0
    - Lie: 2

In [4]:
total_frame = {}
BACK_frame = {}
RUA_frame = {}
RLA_frame = {}
LUA_frame = {}
LLA_frame = {}

In [5]:
# Back_frame, RUA_frame, RLA_frame, LUA_frame, LLA_frame: 
# 11-15: User1
# 21-25: User2
# 31-35: User3
# 41-45: User4

In [6]:
# GT= 0 is for the interleaving activity

In [7]:
for user_index in range(1,2):
    for run_index in range(1,2):
        file_name = "S"+str(user_index)+"-ADL"+str(run_index)+".dat"
        dataframe = pd.read_csv(url+file_name,sep=" ", header=None) 

In [8]:
dataframe[0].head()

0      0
1     33
2     67
3    100
4    133
Name: 0, dtype: int64

In [9]:
for user_index in range(1,5):
    for run_index in range(1,6):
        file_name = "S"+str(user_index)+"-ADL"+str(run_index)+".dat"
        dataframe = pd.read_csv(url+file_name,sep=" ", header=None) 
        
        dataframe.sort_values(0)
        index = (user_index * 10) + run_index
        
        BACK_IMU_frame = dataframe[[0,37,38,39,243]].copy()
        BACK_IMU_frame.rename(columns={0:'Time', 37: 'Acc_x', 38:'Acc_y', 39: 'Acc_z', 243: 'gt'}, inplace=True)
        BACK_IMU_frame = BACK_IMU_frame.dropna()
        BACK_IMU_frame = BACK_IMU_frame[BACK_IMU_frame["gt"] != 0].copy()
        
        BACK_IMU_frame.loc[:, 'Person'] = user_index
        BACK_IMU_frame.loc[:, 'Position'] = "Back"
        BACK_frame[index] = BACK_IMU_frame.copy()
        BACK_frame[index].reset_index(drop=True, inplace=True)
        
        
        
        
        RUA_IMU_frame = dataframe[[0,50,51,52,243]].copy()
        RUA_IMU_frame.rename(columns={0:'Time', 50: 'Acc_x', 51:'Acc_y', 52: 'Acc_z', 243: 'gt'}, inplace=True)
        RUA_IMU_frame = RUA_IMU_frame.dropna()
        RUA_IMU_frame = RUA_IMU_frame[RUA_IMU_frame["gt"] != 0].copy()
        
        RUA_IMU_frame.loc[:, 'Person'] = user_index
        RUA_IMU_frame.loc[:, 'Position'] = "Right Upper Arm"
        RUA_frame[index] = RUA_IMU_frame.copy()
        RUA_frame[index].reset_index(drop=True, inplace=True)
        
        
        
        RLA_IMU_frame = dataframe[[0,63,64,65,243]].copy()
        RLA_IMU_frame.rename(columns={0:'Time', 63: 'Acc_x', 64:'Acc_y', 65: 'Acc_z', 243: 'gt'}, inplace=True)
        RLA_IMU_frame = RLA_IMU_frame.dropna()
        RLA_IMU_frame = RLA_IMU_frame[RLA_IMU_frame["gt"] != 0].copy()

        RLA_IMU_frame.loc[:, 'Person'] = user_index
        RLA_IMU_frame.loc[:, 'Position'] = "Right Lower Arm"
        RLA_frame[index] = RLA_IMU_frame.copy()
        RLA_frame[index].reset_index(drop=True, inplace=True)
        
        
        
        LUA_IMU_frame = dataframe[[0,76,77,78,243]].copy()
        LUA_IMU_frame.rename(columns={0:'Time', 76: 'Acc_x', 77:'Acc_y', 78: 'Acc_z', 243: 'gt'}, inplace=True)
        LUA_IMU_frame = LUA_IMU_frame.dropna()
        LUA_IMU_frame = LUA_IMU_frame[LUA_IMU_frame["gt"] != 0].copy()

        LUA_IMU_frame.loc[:, 'Person'] = user_index
        LUA_IMU_frame.loc[:, 'Position'] = "Left Upper Arm"
        LUA_frame[index] = LUA_IMU_frame.copy()
        LUA_frame[index].reset_index(drop=True, inplace=True)
        
        
        
        LLA_IMU_frame = dataframe[[0,89,90,91,243]].copy()
        LLA_IMU_frame.rename(columns={0:'Time', 89: 'Acc_x', 90:'Acc_y', 91: 'Acc_z', 243: 'gt'}, inplace=True)
        LLA_IMU_frame = LLA_IMU_frame.dropna()
        LLA_IMU_frame = LLA_IMU_frame[LLA_IMU_frame["gt"] != 0].copy()

        LLA_IMU_frame.loc[:, 'Person'] = user_index
        LLA_IMU_frame.loc[:, 'Position'] = "Left Lower Arm"
        LLA_frame[index] = LLA_IMU_frame.copy()
        LLA_frame[index].reset_index(drop=True, inplace=True)
        

In [10]:
BACK_frame[11].head()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,gt,Person,Position
0,98466,-987.0,-130.0,106.0,1,1,Back
1,98499,-986.0,-128.0,106.0,1,1,Back
2,98532,-989.0,-131.0,103.0,1,1,Back
3,98566,-989.0,-127.0,105.0,1,1,Back
4,98599,-989.0,-127.0,110.0,1,1,Back


In [11]:
BACK_frame[11].tail()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,gt,Person,Position
37502,1625284,-813.0,-380.0,17.0,2,1,Back
37503,1625317,-915.0,-463.0,47.0,2,1,Back
37504,1625350,-1008.0,-447.0,146.0,2,1,Back
37505,1625384,-1030.0,-373.0,115.0,2,1,Back
37506,1625417,-955.0,-325.0,78.0,2,1,Back


In [12]:
np.unique(BACK_frame[11]['gt'])

array([1, 2, 4, 5])

In [13]:
# user1 = {}

In [14]:
# Back_frame_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
# Back_frame_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])

# RUA_frame_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
# RUA_frame_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])

# RLA_frame_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
# RLA_frame_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])

# LUA_frame_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
# LUA_frame_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])

# LLA_frame_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
# LLA_frame_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])

In [15]:
# upto = int(label_1_user_1.shape[0]*0.8)

In [16]:
# train_label1_user1 = label_1_user_1[0:upto]
# valid_label1_user1 = label_1_user_1[upto+1 : label_1_user_1.shape[0]]

In [17]:
# user1_train = pd.concat([Back_frame_train, train_label1_user1])

In [18]:
Back_frame_total = pd.DataFrame(columns=['Time', 'Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
RUA_frame_total = pd.DataFrame(columns=['Time', 'Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
RLA_frame_total = pd.DataFrame(columns=['Time', 'Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
LUA_frame_total = pd.DataFrame(columns=['Time', 'Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])
LLA_frame_total = pd.DataFrame(columns=['Time', 'Acc_x', 'Acc_y', 'Acc_z', 'gt', 'Person', 'Position'])

In [19]:
back_shape = 0
for user_index in range(1,5):
    for run_index in range(1,6):
        index = (user_index * 10) + run_index
        
        # To check the final dataframe size
        back_shape = back_shape + BACK_frame[index].shape[0]
        
        Back_frame_total = Back_frame_total.append(BACK_frame[index], ignore_index=True)
        RUA_frame_total = RUA_frame_total.append(RUA_frame[index], ignore_index=True)
        RLA_frame_total = RLA_frame_total.append(RLA_frame[index], ignore_index=True)
        LUA_frame_total = LUA_frame_total.append(LUA_frame[index], ignore_index=True)
        LLA_frame_total = LLA_frame_total.append(LLA_frame[index], ignore_index=True)

In [20]:
Back_frame_total.head()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,gt,Person,Position
0,98466,-987.0,-130.0,106.0,1,1,Back
1,98499,-986.0,-128.0,106.0,1,1,Back
2,98532,-989.0,-131.0,103.0,1,1,Back
3,98566,-989.0,-127.0,105.0,1,1,Back
4,98599,-989.0,-127.0,110.0,1,1,Back


In [21]:
Back_frame_total.tail()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,gt,Person,Position
492238,943257,-980.0,-244.0,256.0,2,4,Back
492239,943291,-1009.0,-219.0,268.0,2,4,Back
492240,943324,-972.0,-192.0,269.0,2,4,Back
492241,943357,-910.0,-204.0,255.0,2,4,Back
492242,943391,-876.0,-219.0,256.0,2,4,Back


In [22]:
back_shape

492243

In [23]:
np.unique(Back_frame_total['gt'])

array([1, 2, 4, 5], dtype=object)

### Label Mapping

In [24]:
Back_frame_total.replace({'gt' : { 1:100, 2:200, 4:400, 5:500}}, inplace = True)
Back_frame_total.replace({'gt' : { 100:1, 200:3, 400:0, 500:2}}, inplace = True)

RUA_frame_total.replace({'gt' : { 1:100, 2:200, 4:400, 5:500}}, inplace = True)
RUA_frame_total.replace({'gt' : { 100:1, 200:3, 400:0, 500:2}}, inplace = True)

RLA_frame_total.replace({'gt' : { 1:100, 2:200, 4:400, 5:500}}, inplace = True)
RLA_frame_total.replace({'gt' : { 100:1, 200:3, 400:0, 500:2}}, inplace = True)

LUA_frame_total.replace({'gt' : { 1:100, 2:200, 4:400, 5:500}}, inplace = True)
LUA_frame_total.replace({'gt' : { 100:1, 200:3, 400:0, 500:2}}, inplace = True)

LLA_frame_total.replace({'gt' : { 1:100, 2:200, 4:400, 5:500}}, inplace = True)
LLA_frame_total.replace({'gt' : { 100:1, 200:3, 400:0, 500:2}}, inplace = True)

In [25]:
np.unique(LLA_frame_total['gt'])

array([0, 1, 2, 3])

### Position-wise Standardization and Normalization

#### Back

In [26]:
time_gt = np.array(Back_frame_total['Time'])
person_gt = np.array(Back_frame_total['Person'])
gt = np.array(Back_frame_total['gt'])
position_gt = np.array(Back_frame_total['Position'])

Back_frame_total.drop(['Person','gt','Position'], axis=1, inplace=True)
column_name = Back_frame_total.columns

scaler = preprocessing.StandardScaler()
df_standardized = scaler.fit_transform(Back_frame_total)
Back_frame_total = pd.DataFrame(df_standardized)

min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(Back_frame_total)
Back_frame_total = pd.DataFrame(np_scaled, columns=column_name)

Back_frame_total["Person"] = person_gt
Back_frame_total["gt"] = gt
Back_frame_total["Position"] = position_gt
Back_frame_total["Time"] = time_gt

In [27]:
Back_frame_total.head()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,Person,gt,Position
0,98466,0.362151,0.375,0.498662,1,1,Back
1,98499,0.36255,0.375643,0.498662,1,1,Back
2,98532,0.361355,0.374679,0.497514,1,1,Back
3,98566,0.361355,0.375964,0.498279,1,1,Back
4,98599,0.361355,0.375964,0.500191,1,1,Back


In [28]:
np.min(Back_frame_total['Acc_x'])

0.0

#### RUA

In [29]:
time_gt = np.array(RUA_frame_total['Time'])
person_gt = np.array(RUA_frame_total['Person'])
gt = np.array(RUA_frame_total['gt'])
position_gt = np.array(RUA_frame_total['Position'])

RUA_frame_total.drop(['Person','gt','Position'], axis=1, inplace=True)
column_name = RUA_frame_total.columns

scaler = preprocessing.StandardScaler()
df_standardized = scaler.fit_transform(RUA_frame_total)
RUA_frame_total = pd.DataFrame(df_standardized)

min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(RUA_frame_total)
RUA_frame_total = pd.DataFrame(np_scaled, columns=column_name)

RUA_frame_total["Person"] = person_gt
RUA_frame_total["gt"] = gt
RUA_frame_total["Position"] = position_gt
RUA_frame_total["Time"] = time_gt

#### RLA

In [30]:
time_gt = np.array(RLA_frame_total['Time'])
person_gt = np.array(RLA_frame_total['Person'])
gt = np.array(RLA_frame_total['gt'])
position_gt = np.array(RLA_frame_total['Position'])

RLA_frame_total.drop(['Person','gt','Position'], axis=1, inplace=True)
column_name = RLA_frame_total.columns

scaler = preprocessing.StandardScaler()
df_standardized = scaler.fit_transform(RLA_frame_total)
RLA_frame_total = pd.DataFrame(df_standardized)

min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(RLA_frame_total)
RLA_frame_total = pd.DataFrame(np_scaled, columns=column_name)

RLA_frame_total["Person"] = person_gt
RLA_frame_total["gt"] = gt
RLA_frame_total["Position"] = position_gt
RLA_frame_total["Time"] = time_gt

#### LUA

In [31]:
time_gt = np.array(LUA_frame_total['Time'])
person_gt = np.array(LUA_frame_total['Person'])
gt = np.array(LUA_frame_total['gt'])
position_gt = np.array(LUA_frame_total['Position'])

LUA_frame_total.drop(['Person','gt','Position'], axis=1, inplace=True)
column_name = LUA_frame_total.columns

scaler = preprocessing.StandardScaler()
df_standardized = scaler.fit_transform(LUA_frame_total)
LUA_frame_total = pd.DataFrame(df_standardized)

min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(LUA_frame_total)
LUA_frame_total = pd.DataFrame(np_scaled, columns=column_name)

LUA_frame_total["Person"] = person_gt
LUA_frame_total["gt"] = gt
LUA_frame_total["Position"] = position_gt
LUA_frame_total["Time"] = time_gt

#### LLA

In [32]:
time_gt = np.array(LLA_frame_total['Time'])
person_gt = np.array(LLA_frame_total['Person'])
gt = np.array(LLA_frame_total['gt'])
position_gt = np.array(LLA_frame_total['Position'])

LLA_frame_total.drop(['Person','gt','Position'], axis=1, inplace=True)
column_name = LLA_frame_total.columns

scaler = preprocessing.StandardScaler()
df_standardized = scaler.fit_transform(LLA_frame_total)
LLA_frame_total = pd.DataFrame(df_standardized)

min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(LLA_frame_total)
LLA_frame_total = pd.DataFrame(np_scaled, columns=column_name)

LLA_frame_total["Person"] = person_gt
LLA_frame_total["gt"] = gt
LLA_frame_total["Position"] = position_gt
LLA_frame_total["Time"] = time_gt

In [33]:
LLA_frame_total.head()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,Person,gt,Position
0,98466,0.490766,0.622639,0.589294,1,1,Left Lower Arm
1,98499,0.491498,0.621701,0.589856,1,1,Left Lower Arm
2,98532,0.491132,0.622237,0.590531,1,1,Left Lower Arm
3,98566,0.491315,0.621969,0.591543,1,1,Left Lower Arm
4,98599,0.490949,0.621165,0.59188,1,1,Left Lower Arm


In [34]:
LLA_frame_total.tail()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,Person,gt,Position
492238,943257,0.497714,0.631614,0.571525,4,3,Left Lower Arm
492239,943291,0.50448,0.626658,0.569276,4,3,Left Lower Arm
492240,943324,0.5096,0.605224,0.565902,4,3,Left Lower Arm
492241,943357,0.509782,0.597857,0.57951,4,3,Left Lower Arm
492242,943391,0.513622,0.598526,0.577935,4,3,Left Lower Arm


### Save Files

In [35]:
save_path = "/home/avijoychakma/Downloads/DTCN-AR/Dataset Preprocessing/OPPORTUNITY/ACC-Position-Preprocessed/New Windowing/"

In [36]:
Back_frame_total.head()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,Person,gt,Position
0,98466,0.362151,0.375,0.498662,1,1,Back
1,98499,0.36255,0.375643,0.498662,1,1,Back
2,98532,0.361355,0.374679,0.497514,1,1,Back
3,98566,0.361355,0.375964,0.498279,1,1,Back
4,98599,0.361355,0.375964,0.500191,1,1,Back


In [37]:
Back_frame_total.tail()

Unnamed: 0,Time,Acc_x,Acc_y,Acc_z,Person,gt,Position
492238,943257,0.36494,0.338368,0.556023,4,3,Back
492239,943291,0.353386,0.346401,0.560612,4,3,Back
492240,943324,0.368127,0.355077,0.560994,4,3,Back
492241,943357,0.392829,0.351221,0.555641,4,3,Back
492242,943391,0.406375,0.346401,0.556023,4,3,Back


In [38]:
np.unique(Back_frame_total['gt'])

array([0, 1, 2, 3])

In [39]:
# for user_index in range(1,5):
#     for activity_index in range(0,4):
#         back_dataframe = Back_frame_total[(Back_frame_total['Person'] == user_index) & (Back_frame_total['gt'] == activity_index)]
#         back_dataframe.to_csv (save_path+'BACK_user'+str(user_index)+'_a'+str(activity_index)+'.csv', index = None, header=True)
        
#         RUA_dataframe = RUA_frame_total[(RUA_frame_total['Person'] == user_index) & (RUA_frame_total['gt'] == activity_index)]
#         RUA_dataframe.to_csv (save_path+'RUA_user'+str(user_index)+'_a'+str(activity_index)+'.csv', index = None, header=True)
        
#         RLA_dataframe = RLA_frame_total[(RLA_frame_total['Person'] == user_index) & (RLA_frame_total['gt'] == activity_index)]
#         RLA_dataframe.to_csv (save_path+'RLA_user'+str(user_index)+'_a'+str(activity_index)+'.csv', index = None, header=True)
        
#         LUA_dataframe = LUA_frame_total[(LUA_frame_total['Person'] == user_index) & (LUA_frame_total['gt'] == activity_index)]
#         LUA_dataframe.to_csv (save_path+'LUA_user'+str(user_index)+'_a'+str(activity_index)+'.csv', index = None, header=True)
        
#         LLA_dataframe = LLA_frame_total[(LLA_frame_total['Person'] == user_index) & (LLA_frame_total['gt'] == activity_index)]
#         LLA_dataframe.to_csv (save_path+'LLA_user'+str(user_index)+'_a'+str(activity_index)+'.csv', index = None, header=True)

In [40]:
for user_index in range(1,5):
    for activity_index in range(0,4):
        back_dataframe = Back_frame_total[(Back_frame_total['Person'] == user_index) & (Back_frame_total['gt'] == activity_index)]
        upto = int(back_dataframe.shape[0]*0.8)    
        train = back_dataframe[0:upto].copy()
        valid = back_dataframe[upto+1 : back_dataframe.shape[0]].copy()
        train.to_csv (save_path+'BACK_user'+str(user_index)+'_a'+str(activity_index)+'_train_'+'.csv', index = None, header=True)
        valid.to_csv (save_path+'BACK_user'+str(user_index)+'_a'+str(activity_index)+'_valid_'+'.csv', index = None, header=True)
        
        RUA_dataframe = RUA_frame_total[(RUA_frame_total['Person'] == user_index) & (RUA_frame_total['gt'] == activity_index)]
        upto = int(RUA_dataframe.shape[0]*0.8)    
        train = RUA_dataframe[0:upto].copy()
        valid = RUA_dataframe[upto+1 : RUA_dataframe.shape[0]].copy()
        train.to_csv (save_path+'RUA_user'+str(user_index)+'_a'+str(activity_index)+'_train_'+'.csv', index = None, header=True)
        valid.to_csv (save_path+'RUA_user'+str(user_index)+'_a'+str(activity_index)+'_valid_'+'.csv', index = None, header=True)
        
        RLA_dataframe = RLA_frame_total[(RLA_frame_total['Person'] == user_index) & (RLA_frame_total['gt'] == activity_index)]
        upto = int(RLA_dataframe.shape[0]*0.8)    
        train = RLA_dataframe[0:upto].copy()
        valid = RLA_dataframe[upto+1 : RLA_dataframe.shape[0]].copy()
        train.to_csv (save_path+'RLA_user'+str(user_index)+'_a'+str(activity_index)+'_train_'+'.csv', index = None, header=True)
        valid.to_csv (save_path+'RLA_user'+str(user_index)+'_a'+str(activity_index)+'_valid_'+'.csv', index = None, header=True)
        
        LUA_dataframe = LUA_frame_total[(LUA_frame_total['Person'] == user_index) & (LUA_frame_total['gt'] == activity_index)]
        upto = int(LUA_dataframe.shape[0]*0.8)    
        train = LUA_dataframe[0:upto].copy()
        valid = LUA_dataframe[upto+1 : LUA_dataframe.shape[0]].copy()
        train.to_csv (save_path+'LUA_user'+str(user_index)+'_a'+str(activity_index)+'_train_'+'.csv', index = None, header=True)
        valid.to_csv (save_path+'LUA_user'+str(user_index)+'_a'+str(activity_index)+'_valid_'+'.csv', index = None, header=True)
        
        LLA_dataframe = LLA_frame_total[(LLA_frame_total['Person'] == user_index) & (LLA_frame_total['gt'] == activity_index)]
        upto = int(LLA_dataframe.shape[0]*0.8)    
        train = LLA_dataframe[0:upto].copy()
        valid = LLA_dataframe[upto+1 : LLA_dataframe.shape[0]].copy()
        train.to_csv (save_path+'LLA_user'+str(user_index)+'_a'+str(activity_index)+'_train_'+'.csv', index = None, header=True)
        valid.to_csv (save_path+'LLA_user'+str(user_index)+'_a'+str(activity_index)+'_valid_'+'.csv', index = None, header=True)

### Total Dataframe Standardization and Normalization

In [41]:
# save_path = "/home/avijoychakma/Downloads/DTCN-AR/Dataset Preprocessing/OPPORTUNITY/ACC-Total-Preprocessed/"

In [42]:
# total_frame = pd.concat([Back_frame_total, RUA_frame_total, RLA_frame_total, LUA_frame_total, LLA_frame_total])
# total_frame.reset_index(drop=True, inplace = True)

In [43]:
# total_frame.tail()

In [44]:
# person_gt = np.array(total_frame['Person'])
# gt = np.array(total_frame['gt'])
# position_gt = np.array(total_frame['Position'])

In [45]:
# total_frame.drop(['Person','gt','Position'], axis=1, inplace=True)
# column_name = total_frame.columns

In [46]:
# scaler = preprocessing.StandardScaler()
# df_standardized = scaler.fit_transform(total_frame)
# total_frame = pd.DataFrame(df_standardized)

In [47]:
# min_max_scaler = preprocessing.MinMaxScaler()
# np_scaled = min_max_scaler.fit_transform(total_frame)
# total_frame = pd.DataFrame(np_scaled, columns=column_name)

In [48]:
# total_frame["Person"] = person_gt
# total_frame["gt"] = gt
# total_frame["Position"] = position_gt

In [49]:
# total_frame.head()

In [50]:
# total_frame.shape

In [51]:
# np.unique(total_frame['Position'])

### Save Files

In [52]:
# user1_BACK = total_frame[(total_frame['Person'] == 1) & (total_frame['Position'] == 'Back')]
# user1_BACK.reset_index(drop=True, inplace=True)

# user2_BACK = total_frame[(total_frame['Person'] == 2) & (total_frame['Position'] == 'Back')]
# user2_BACK.reset_index(drop=True, inplace=True)

# user3_BACK = total_frame[(total_frame['Person'] == 3) & (total_frame['Position'] == 'Back')]
# user3_BACK.reset_index(drop=True, inplace=True)

# user4_BACK = total_frame[(total_frame['Person'] == 4) & (total_frame['Position'] == 'Back')]
# user4_BACK.reset_index(drop=True, inplace=True)

# user1_BACK.to_csv (save_path+'user1_back.csv', index = None, header=True)
# user2_BACK.to_csv (save_path+'user2_back.csv', index = None, header=True)
# user3_BACK.to_csv (save_path+'user3_back.csv', index = None, header=True)
# user4_BACK.to_csv (save_path+'user4_back.csv', index = None, header=True)

In [53]:
# user1_RUA = total_frame[(total_frame['Person'] == 1) & (total_frame['Position'] == 'Right Upper Arm')]
# user1_RUA.reset_index(drop=True, inplace=True)

# user2_RUA = total_frame[(total_frame['Person'] == 2) & (total_frame['Position'] == 'Right Upper Arm')]
# user2_RUA.reset_index(drop=True, inplace=True)

# user3_RUA = total_frame[(total_frame['Person'] == 3) & (total_frame['Position'] == 'Right Upper Arm')]
# user3_RUA.reset_index(drop=True, inplace=True)

# user4_RUA = total_frame[(total_frame['Person'] == 4) & (total_frame['Position'] == 'Right Upper Arm')]
# user4_RUA.reset_index(drop=True, inplace=True)

# user1_RUA.to_csv (save_path+'user1_RUA.csv', index = None, header=True)
# user2_RUA.to_csv (save_path+'user2_RUA.csv', index = None, header=True)
# user3_RUA.to_csv (save_path+'user3_RUA.csv', index = None, header=True)
# user4_RUA.to_csv (save_path+'user4_RUA.csv', index = None, header=True)

In [54]:
# user1_RLA = total_frame[(total_frame['Person'] == 1) & (total_frame['Position'] == 'Right Lower Arm')]
# user1_RLA.reset_index(drop=True, inplace=True)

# user2_RLA = total_frame[(total_frame['Person'] == 2) & (total_frame['Position'] == 'Right Lower Arm')]
# user2_RLA.reset_index(drop=True, inplace=True)

# user3_RLA = total_frame[(total_frame['Person'] == 3) & (total_frame['Position'] == 'Right Lower Arm')]
# user3_RLA.reset_index(drop=True, inplace=True)

# user4_RLA = total_frame[(total_frame['Person'] == 4) & (total_frame['Position'] == 'Right Lower Arm')]
# user4_RLA.reset_index(drop=True, inplace=True)

# user1_RLA.to_csv (save_path+'user1_RLA.csv', index = None, header=True)
# user2_RLA.to_csv (save_path+'user2_RLA.csv', index = None, header=True)
# user3_RLA.to_csv (save_path+'user3_RLA.csv', index = None, header=True)
# user4_RLA.to_csv (save_path+'user4_RLA.csv', index = None, header=True)

In [55]:
# user1_LUA = total_frame[(total_frame['Person'] == 1) & (total_frame['Position'] == 'Left Upper Arm')]
# user1_LUA.reset_index(drop=True, inplace=True)

# user2_LUA = total_frame[(total_frame['Person'] == 2) & (total_frame['Position'] == 'Left Upper Arm')]
# user2_LUA.reset_index(drop=True, inplace=True)

# user3_LUA = total_frame[(total_frame['Person'] == 3) & (total_frame['Position'] == 'Left Upper Arm')]
# user3_LUA.reset_index(drop=True, inplace=True)

# user4_LUA = total_frame[(total_frame['Person'] == 4) & (total_frame['Position'] == 'Left Upper Arm')]
# user4_LUA.reset_index(drop=True, inplace=True)

# user1_LUA.to_csv (save_path+'user1_LUA.csv', index = None, header=True)
# user2_LUA.to_csv (save_path+'user2_LUA.csv', index = None, header=True)
# user3_LUA.to_csv (save_path+'user3_LUA.csv', index = None, header=True)
# user4_LUA.to_csv (save_path+'user4_LUA.csv', index = None, header=True)

In [56]:
# user1_LLA = total_frame[(total_frame['Person'] == 1) & (total_frame['Position'] == 'Left Lower Arm')]
# user1_LLA.reset_index(drop=True, inplace=True)

# user2_LLA = total_frame[(total_frame['Person'] == 2) & (total_frame['Position'] == 'Left Lower Arm')]
# user2_LLA.reset_index(drop=True, inplace=True)

# user3_LLA = total_frame[(total_frame['Person'] == 3) & (total_frame['Position'] == 'Left Lower Arm')]
# user3_LLA.reset_index(drop=True, inplace=True)

# user4_LLA = total_frame[(total_frame['Person'] == 4) & (total_frame['Position'] == 'Left Lower Arm')]
# user4_LLA.reset_index(drop=True, inplace=True)

# user1_LLA.to_csv (save_path+'user1_LLA.csv', index = None, header=True)
# user2_LLA.to_csv (save_path+'user2_LLA.csv', index = None, header=True)
# user3_LLA.to_csv (save_path+'user3_LLA.csv', index = None, header=True)
# user4_LLA.to_csv (save_path+'user4_LLA.csv', index = None, header=True)

In [57]:
search_path = "/home/avijoychakma/Downloads/DTCN-AR/Dataset Preprocessing/OPPORTUNITY/ACC-Position-Preprocessed/New Windowing/"
save_path = "/home/avijoychakma/Downloads/DTCN-AR/Dataset Preprocessing/OPPORTUNITY/ACC-Position-Preprocessed/New Windowing/Splitted Files/"

In [58]:
position = ["BACK","RUA","RLA","LUA","LLA"]
user = ["user1", "user2", "user3", "user4"]
activity = ["a0", "a1", "a2", "a3"]

In [59]:
BACK_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])
BACK_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])

RUA_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])
RUA_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])

RLA_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])
RLA_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])

LUA_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])
LUA_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])

LLA_train = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])
LLA_valid = pd.DataFrame(columns=['Time','Acc_x', 'Acc_y', 'Acc_z', 'Person', 'gt', 'Position'])

In [60]:
BACK_train.shape

(0, 7)

In [61]:
# for position_index in range(0,1):
#     for user_index in range(1,2):
#         for activity_index in range(0,1):
#             train_file_name = position[position_index]+'_'+user[user_index-1]+'_'+activity[activity_index]+'_train_'+".csv"
#             valid_file_name = position[position_index]+'_'+user[user_index-1]+'_'+activity[activity_index]+'_valid_'+".csv"
            
#             train_dataframe = pd.read_csv(search_path+train_file_name)
#             valid_dataframe = pd.read_csv(search_path+valid_file_name)
            
#             BACK_train = pd.concat([BACK_train, train_dataframe], ignore_index=True, sort=False)
#             BACK_valid = pd.concat([BACK_valid, valid_dataframe], ignore_index=True, sort=False)

In [62]:
for position_index in range(0,5):
    for user_index in range(1,5):
        for activity_index in range(0,4):
            train_file_name = position[position_index]+'_'+user[user_index-1]+'_'+activity[activity_index]+'_train_'+".csv"
            valid_file_name = position[position_index]+'_'+user[user_index-1]+'_'+activity[activity_index]+'_valid_'+".csv"
            
            train_dataframe = pd.read_csv(search_path+train_file_name)
            valid_dataframe = pd.read_csv(search_path+valid_file_name)
            
            if position_index == 0:
                BACK_train = pd.concat([BACK_train, train_dataframe], ignore_index=True, sort=False)
                BACK_valid = pd.concat([BACK_valid, valid_dataframe], ignore_index=True, sort=False)
            elif user_index == 1:
                RUA_train = pd.concat([RUA_train, train_dataframe], ignore_index=True, sort=False)
                RUA_valid = pd.concat([RUA_valid, valid_dataframe], ignore_index=True, sort=False)
            elif user_index == 2:
                RLA_train = pd.concat([RLA_train, train_dataframe], ignore_index=True, sort=False)
                RLA_valid = pd.concat([RLA_valid, valid_dataframe], ignore_index=True, sort=False)
            elif user_index == 3:
                LUA_train = pd.concat([LUA_train, train_dataframe], ignore_index=True, sort=False)
                LUA_valid = pd.concat([LUA_valid, valid_dataframe], ignore_index=True, sort=False)
            elif user_index == 4:
                LLA_train = pd.concat([LLA_train, train_dataframe], ignore_index=True, sort=False)
                LLA_valid = pd.concat([LLA_valid, valid_dataframe], ignore_index=True, sort=False)
                
                
BACK_train.to_csv (save_path+'BACK_train.csv', index = None, header=True)
BACK_valid.to_csv (save_path+'BACK_valid.csv', index = None, header=True)

RUA_train.to_csv (save_path+'RUA_train.csv', index = None, header=True)
RUA_valid.to_csv (save_path+'RUA_valid.csv', index = None, header=True)

RLA_train.to_csv (save_path+'RLA_train.csv', index = None, header=True)
RLA_valid.to_csv (save_path+'RLA_valid.csv', index = None, header=True)

LUA_train.to_csv (save_path+'LUA_train.csv', index = None, header=True)
LUA_valid.to_csv (save_path+'LUA_valid.csv', index = None, header=True)

LLA_train.to_csv (save_path+'LLA_train.csv', index = None, header=True)
LLA_valid.to_csv (save_path+'LLA_valid.csv', index = None, header=True)

## Prepare Final Dataset

In [63]:
BACK_dataset_train = []
RUA_dataset_train = []
RLA_dataset_train = []
LUA_dataset_train = []
LLA_dataset_train = []
BACK_gt_train = []
RUA_gt_train = []
RLA_gt_train = []
LUA_gt_train = []
LLA_gt_train = []

In [64]:
BACK_dataset_valid = []
RUA_dataset_valid = []
RLA_dataset_valid = []
LUA_dataset_valid = []
LLA_dataset_valid = []
BACK_gt_valid = []
RUA_gt_valid = []
RLA_gt_valid = []
LUA_gt_valid = []
LLA_gt_valid = []

In [65]:
win_size=128
step_size=64
AXIS = 3
FROM = 1
TO = FROM+3
START = TO + 1
END = TO + 2

In [66]:
item = ["train","valid"]

In [67]:
for position_index in tqdm(range(0,5)): #Back, RUA, RLA, LUA, LLA
    for split_index in range(0,2): # 4 Persons
        file_name = position[position_index]+'_'+item[split_index]
        
        df = pd.read_csv(save_path+file_name+'.csv', sep=",")   
        len_df = df.shape[0]
        narray = df.to_numpy()

        for i in range(0, len_df, step_size):
            window = narray[i:i+win_size, FROM:TO]
            
            if window.shape[0] != win_size:
                continue
            else:
                reshaped_window = window.reshape(1,win_size,1,AXIS)
                gt = np.bincount(narray[i:i+win_size,START:END].astype(int).ravel()).argmax()
                
                if position_index == 0:
                    if split_index == 0:
                        BACK_dataset_train.append(reshaped_window)
                        BACK_gt_train.append(gt)
                    elif split_index == 1:
                        BACK_dataset_valid.append(reshaped_window)
                        BACK_gt_valid.append(gt)
                elif position_index == 1:
                    if split_index == 0:
                        RUA_dataset_train.append(reshaped_window)
                        RUA_gt_train.append(gt)
                    elif split_index == 1:
                        RUA_dataset_valid.append(reshaped_window)
                        RUA_gt_valid.append(gt)
                elif position_index == 2:
                    if split_index == 0:
                        RLA_dataset_train.append(reshaped_window)
                        RLA_gt_train.append(gt)
                    elif split_index == 1:
                        RLA_dataset_valid.append(reshaped_window)
                        RLA_gt_valid.append(gt)
                elif position_index == 3:
                    if split_index == 0:
                        LUA_dataset_train.append(reshaped_window)
                        LUA_gt_train.append(gt)
                    elif split_index == 1:
                        LUA_dataset_valid.append(reshaped_window)
                        LUA_gt_valid.append(gt)
                elif position_index == 4:
                    if split_index == 0:
                        LLA_dataset_train.append(reshaped_window)
                        LLA_gt_train.append(gt)
                    elif split_index == 1:
                        LLA_dataset_valid.append(reshaped_window)
                        LLA_gt_valid.append(gt)

100%|██████████| 5/5 [00:02<00:00,  2.38it/s]
