In [1]:
# Dataset link: http://archive.ics.uci.edu/ml/datasets/pamap2+physical+activity+monitoring

In [2]:
import matplotlib as plt
import pandas as pd
import os
from tqdm import tqdm
import tqdm
import numpy as np
import math
from sklearn import preprocessing
import csv

In [3]:
url = "/home/avijoychakma/Downloads/PerCom 2020/Dataset/PAMAP2_Dataset/Protocol/"

In [4]:
save_path = "/home/avijoychakma/Downloads/DTCN-AR/Dataset Preprocessing/PAMAP/ACC-Positionwise-Normalization/Data Files-7 Activities/"

In [5]:
# url = "/home/avijoy/Downloads/Dataset/PAMAP2_Dataset/Protocol/"

# 1. Dataset Information
- Missing data
- File Format(Each row 54 column)
        1 timestamp
        2 Activity class
        3 HR rate
        4-20 IMU hand, 21-37 IMU chest, 38-54 IMU ankle 
           
- 100 hz sampling freq
- List of activityIDs and corresponding activities:
 1 lying
2 sitting
3 standing
4 walking
5 running
6 cycling
7 Nordic walking
9 watching TV
10 computer work
11 car driving
12 ascending stairs
13 descending stairs
16 vacuum cleaning
17 ironing
18 folding laundry
19 house cleaning
20 playing soccer
24 rope jumping
0 other (transient activities)

- Important activities: 1,2,3,4,5,12,13,6(For activity shifting)

#### Extract only the IMU data for Chest, Hand, Ankle for User1
    - For Each IMU data:    
        - 1 temp, 
        - 2-4: 3D acceleration, [Recommended]
        - 5-7: 3D acceleration, 
        - 8-10: 3D gyroscope, 
        - 11-13: 3D magnetometer, 
        - 14-17: orientation(invalid)

#### Column 1 Represents the Activity Class
    - Considered classes:Sitting, Standing, Lying, Walking, Running, Stair Up, Stair Down
    - For any user, working_df contains all the activity data
        - Indexing is important for temporal relation

#### Body Position:
    - 0: Torso
    - 1: Dominant Arm
    - 3: Dominant Leg

#### Global Mapping

In [6]:
# - Sitting = 0
# - Standing = 1
# - Lying = 2
# - Walking = 3
# - Running = 4
# - Ascending stairs = 5
# - Descending stairs = 6

#### PAMAP Mapping

In [7]:
# - 1 Lying = 2
# - 2 Sitting = 0
# - 3 Standing = 1
# - 4 Walking = 3
# - 5 Running = 4
# - 12 Ascending = 5
# - 13 Descending = 6

In [8]:
### Activities: - 1 lying - 2 sitting - 3 standing - 4 walking - 5 running - 12 ascending - 13 descending

In [9]:
#### Activities to Label 
####- Sitting = 0 - Standing = 1 - Lying = 2 - Walking = 3 - Running = 4 - Ascending stairs = 5 - Descending stairs = 6

In [10]:
hand = [1,4,5,6]
chest = [1,21,22,23]
ankle = [1,38,39,40]
column_name = ['Activity','AccX','AccY','AccZ','Person','Body_Position']
rearranged_column_name = ['AccX','AccY','AccZ','Person','Body_Position','Activity']
activity = [1,2,3,4,5,12,13]

In [11]:
user = {}

###### Each user data for three different position is stored in "user" collection such that 
    - User1 contains data for user 1 at index 10,11,12
    - User2 contains data for user 2 at index 20,21,22
    .
    .
    
    - User8 contains data for user 8 at index 80,81,82

## 2. Extract Data

In [12]:
for person_index in range(1,9):

    df = pd.read_csv(url+"subject10"+str(person_index)+".dat",sep="\s+", header=None)
    
    # Consider only the relevant activities. Column 1 indicates the activity
    working_df = df[(df[1] == 1) | (df[1] == 2) | (df[1] == 3) | (df[1] == 4) | (df[1] == 5) | (df[1] == 12) | (df[1] == 13)].copy()
    
    
    working_df.sort_values(0)
    
    # Initialization
    idx = 0
    
    # For three positions
    for index in range(1,4):
        if index == 1:
            
            # Calculate index
            idx = person_index*10
            user[idx] = working_df[hand].copy()
            user[idx].loc[:, 'Person'] = person_index
            user[idx].loc[:, 'Body_Position'] = "Dominant_Arm"
            
        elif index == 2:
            idx = person_index*10 + 1
            user[idx] = working_df[chest].copy()
            user[idx].loc[:, 'Person'] = person_index
            user[idx].loc[:, 'Body_Position'] = "Torso"
            
        elif index == 3:
            idx = person_index*10 + 2
            user[idx] = working_df[ankle].copy()
            user[idx].loc[:, 'Person'] = person_index
            user[idx].loc[:, 'Body_Position'] = "Dominant_Leg"

        # Assigning the column name
        user[idx].columns = column_name

        # Rearranging the column to place the ground truth activity column at the end
        user[idx] = user[idx].reindex(rearranged_column_name, axis=1)
        user[idx].replace({'Activity' : { 1:100, 2:200, 3:300, 4:400, 5:500, 12:1200, 13:1300}}, inplace = True)
        user[idx].replace({'Activity' : { 100:2, 200:0, 300:1, 400:3, 500:4, 1200:5, 1300:6}}, inplace = True)
        # Remove the NaN entries
        user[idx].dropna(inplace = True)
        user[idx].reset_index(drop=True, inplace=True)
        

In [13]:
user[10].head()

Unnamed: 0,AccX,AccY,AccZ,Person,Body_Position,Activity
0,2.2153,8.27915,5.58753,1,Dominant_Arm,2
1,2.29196,7.67288,5.74467,1,Dominant_Arm,2
2,2.2909,7.1424,5.82342,1,Dominant_Arm,2
3,2.218,7.14365,5.8993,1,Dominant_Arm,2
4,2.30106,7.25857,6.09259,1,Dominant_Arm,2


In [14]:
np.unique(user[10]['Person'])

array([1])

#### User 9 activities - Computer work, folding laundry, houe cleaning, playing soccer, rope jumping

#### "user" collection contains 24 dataframes. Each dataframes contains a position specific data of 7 different activities.
    - Calculate magnitude
    - Decision: Direct windowing/split each dataframe in different activity dataframe then apply windowing

## 3. Position-wise Standardization and Normalization
    - User-Position-wise processing

In [15]:
for person_index in range(1,9):
    idx = 0
    for index in range(1,4):
        if index == 1:
            # Calculate index
            idx = person_index*10
        elif index == 2:
            idx = person_index*10 + 1
        elif index == 3:
            idx = person_index*10 + 2
            
        
        person_gt = np.array(user[idx]['Person'])
        activity_gt = np.array(user[idx]['Activity'])
        position_gt = np.array(user[idx]['Body_Position'])

        user[idx].drop(['Person','Activity','Body_Position'], axis=1, inplace=True)
        column_name = user[idx].columns

        scaler = preprocessing.StandardScaler()
        df_standardized = scaler.fit_transform(user[idx])
        df_standardized = pd.DataFrame(df_standardized)

        min_max_scaler = preprocessing.MinMaxScaler()
        np_scaled = min_max_scaler.fit_transform(df_standardized)
        user[idx] = pd.DataFrame(np_scaled, columns=column_name)

        user[idx]["Person"] = person_gt
        user[idx]["Activity"] = activity_gt
        user[idx]["Body_Position"] = position_gt

In [16]:
user[10].head()

Unnamed: 0,AccX,AccY,AccZ,Person,Activity,Body_Position
0,0.831506,0.248807,0.661577,1,2,Dominant_Arm
1,0.832793,0.24094,0.666582,1,2,Dominant_Arm
2,0.832775,0.234057,0.66909,1,2,Dominant_Arm
3,0.831551,0.234074,0.671507,1,2,Dominant_Arm
4,0.832946,0.235565,0.677663,1,2,Dominant_Arm


## 4. Split Into Individual Files

In [17]:
position = ['Dominant_Arm', 'Torso', 'Dominant_Leg']
train_percentage = 0.8

##### User 1

In [18]:
Start = 10
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User1_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User1_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 2

In [19]:
Start = 20
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User2_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User2_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 3

In [20]:
Start = 30
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User3_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User3_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 4

In [21]:
Start = 40
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User4_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User4_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 5

In [22]:
Start = 50
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User5_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User5_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 6

In [23]:
Start = 60
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User6_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User6_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 7

In [24]:
Start = 70
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User7_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User7_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

##### User 8

In [25]:
Start = 80
for position_index in range(0,3):
    
    train_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    valid_dataframe = pd.DataFrame(columns=['AccX', 'AccY', 'AccZ', 'Person', 'Activity', 'Body_Position'])
    index = Start+position_index
    
    for activity_index in range(0,7):
        
        dataframe = user[index][(user[index]["Activity"] == activity_index)].copy()

        upto = int(dataframe.shape[0]*train_percentage)    
        train = dataframe[0:upto].copy()
        valid = dataframe[upto+1 : dataframe.shape[0]].copy()

        train_dataframe = train_dataframe.append(train, ignore_index=True, sort=False)
        valid_dataframe = valid_dataframe.append(valid, ignore_index=True, sort=False)

    train_dataframe.to_csv (save_path+'User8_'+position[position_index]+'_train'+'.csv', index = None, header=True)
    valid_dataframe.to_csv (save_path+'User8_'+position[position_index]+'_valid'+'.csv', index = None, header=True)

# 5. Windowing

In [26]:
Dominant_Arm_train = []
Torso_train = []
Dominant_Leg_train = []

Dominant_Arm_gt_train = []
Torso_gt_train = []
Dominant_Leg_gt_train = []

In [27]:
Dominant_Arm_valid = []
Torso_valid = []
Dominant_Leg_valid = []

Dominant_Arm_gt_valid = []
Torso_gt_valid = []
Dominant_Leg_gt_valid = []

In [28]:
item = ["train","valid"]
user = ["User1","User2","User3","User4","User5","User6","User7","User8"]
position = ['Dominant_Arm', 'Torso', 'Dominant_Leg']
selected_user = 1

In [29]:
win_size=128
step_size=64
AXIS = 3
FROM = 0
TO = FROM+3
START = 4
END = 5

In [30]:
for position_index in tqdm.tqdm(range(0,3)): #'Dominant_Arm', 'Torso', 'Dominant_Leg'
    for split_index in range(0,2):
        file_name = user[selected_user-1] + "_" + position[position_index]+'_'+item[split_index]
        
        print(file_name)
        df = pd.read_csv(save_path+file_name+'.csv', sep=",")   
        len_df = df.shape[0]
        narray = df.to_numpy()

        for i in range(0, len_df, step_size):
            window = narray[i:i+win_size, FROM:TO]
            
            if window.shape[0] != win_size:
                continue
            else:
                reshaped_window = window.reshape(1,win_size,1,AXIS)
                gt = np.bincount(narray[i:i+win_size,START:END].astype(int).ravel()).argmax()
                
                if position_index == 0:
                    if split_index == 0:
                        Dominant_Arm_train.append(reshaped_window)
                        Dominant_Arm_gt_train.append(gt)
                    elif split_index == 1:
                        Dominant_Arm_valid.append(reshaped_window)
                        Dominant_Arm_gt_valid.append(gt)
                elif position_index == 1:
                    if split_index == 0:
                        Torso_train.append(reshaped_window)
                        Torso_gt_train.append(gt)
                    elif split_index == 1:
                        Torso_valid.append(reshaped_window)
                        Torso_gt_valid.append(gt)
                elif position_index == 2:
                    if split_index == 0:
                        Dominant_Leg_train.append(reshaped_window)
                        Dominant_Leg_gt_train.append(gt)
                    elif split_index == 1:
                        Dominant_Arm_gt_valid.append(reshaped_window)
                        Dominant_Arm_gt_valid.append(gt)

 33%|███▎      | 1/3 [00:00<00:00,  6.59it/s]

User1_Dominant_Arm_train
User1_Dominant_Arm_valid
User1_Torso_train


100%|██████████| 3/3 [00:00<00:00,  7.58it/s]

User1_Torso_valid
User1_Dominant_Leg_train
User1_Dominant_Leg_valid





In [33]:
len(Dominant_Arm_train)

1820