In [1]:
# Dataset link: http://archive.ics.uci.edu/ml/datasets/pamap2+physical+activity+monitoring 

In [1]:
import matplotlib as plt
import pandas as pd
import os
from tqdm import tqdm
import numpy as np
import math
from sklearn import preprocessing
import csv

In [2]:
url = "/home/avijoychakma/Downloads/PerCom 2020/Dataset/PAMAP2_Dataset/Protocol/"

In [3]:
# url = "/home/avijoy/Downloads/Dataset/PAMAP2_Dataset/Protocol/"

# 1. Dataset Information
- Missing data
- File Format(Each row 54 column)
        1 timestamp
        2 Activity class
        3 HR rate
        4-20 IMU hand, 21-37 IMU chest, 38-54 IMU ankle 
           
- 100 hz sampling freq
- List of activityIDs and corresponding activities:
 1 lying
2 sitting
3 standing
4 walking
5 running
6 cycling
7 Nordic walking
9 watching TV
10 computer work
11 car driving
12 ascending stairs
13 descending stairs
16 vacuum cleaning
17 ironing
18 folding laundry
19 house cleaning
20 playing soccer
24 rope jumping
0 other (transient activities)

- Important activities: 1,2,3,4,5,12,13,6(For activity shifting)

#### Extract only the IMU data for Chest, Hand, Ankle for User1
    - For Each IMU data:    
        - 1 temp, 
        - 2-4: 3D acceleration, [Recommended]
        - 5-7: 3D acceleration, 
        - 8-10: 3D gyroscope, 
        - 11-13: 3D magnetometer, 
        - 14-17: orientation(invalid)

#### Column 1 Represents the Activity Class
    - Considered classes:Sitting, Standing, Lying, Walking, Running, Stair Up, Stair Down
    - For any user, working_df contains all the activity data
        - Indexing is important for temporal relation

#### Body Position:
    - 0: Torso
    - 1: Dominant Arm
    - 3: Dominant Leg

In [4]:
### Activities: - 1 lying - 2 sitting - 3 standing - 4 walking - 5 running - 12 ascending - 13 descending

In [5]:
#### Activities to Label 
####- Sitting = 0 - Standing = 1 - Lying = 2 - Walking = 3 - Running = 4 - Ascending stairs = 5 - Descending stairs = 6

### Activity Mapping
- 1 Lying = 2
- 2 Sitting = 0
- 3 Standing = 1
- 4 Walking = 3
- 5 Running = 4
- 6 Ascending = 5
- 7 Descending = 6

In [6]:
hand = [1,4,5,6]
chest = [1,21,22,23]
ankle = [1,38,39,40]
column_name = ['Activity','AccX','AccY','AccZ','Person','Body_Position']
rearranged_column_name = ['AccX','AccY','AccZ','Person','Body_Position','Activity']
activity = [1,2,3,4,5,12,13]

In [7]:
user = {}

###### Each user data for three different position is stored in "user" collection such that 
    - User1 contains data for user 1 at index 10,11,12
    - User2 contains data for user 2 at index 20,21,22
    .
    .
    
    - User8 contains data for user 8 at index 80,81,82

In [8]:
for person_index in range(1,9):

    df = pd.read_csv(url+"subject10"+str(person_index)+".dat",sep="\s+", header=None)
    
    # Consider only the relevant activities. Column 1 indicates the activity
    working_df = df[(df[1] == 1) | (df[1] == 2) | (df[1] == 3) | (df[1] == 4) | (df[1] == 5) | (df[1] == 12) | (df[1] == 13)]
    
    # Initialization
    idx = 0
    
    # For three positions
    for index in range(1,4):
        if index == 1:
            
            # Calculate index
            idx = person_index*10
            user[idx] = working_df[hand].copy()
            user[idx].loc[:, 'Person'] = person_index
            user[idx].loc[:, 'Body_Position'] = "Dominant_Arm"
            
        elif index == 2:
            idx = person_index*10 + 1
            user[idx] = working_df[chest].copy()
            user[idx].loc[:, 'Person'] = person_index
            user[idx].loc[:, 'Body_Position'] = "Torso"
            
        elif index == 3:
            idx = person_index*10 + 2
            user[idx] = working_df[ankle].copy()
            user[idx].loc[:, 'Person'] = person_index
            user[idx].loc[:, 'Body_Position'] = "Dominant_Leg"

        # Assigning the column name
        user[idx].columns = column_name

        # Rearranging the column to place the ground truth activity column at the end
        user[idx] = user[idx].reindex(rearranged_column_name, axis=1)

        user[idx].reset_index(drop=True, inplace=True)
        # Remove the NaN entries
        user[idx].dropna(inplace = True)

In [9]:
user[10].head()

Unnamed: 0,AccX,AccY,AccZ,Person,Body_Position,Activity
0,2.2153,8.27915,5.58753,1,Dominant_Arm,1
1,2.29196,7.67288,5.74467,1,Dominant_Arm,1
2,2.2909,7.1424,5.82342,1,Dominant_Arm,1
3,2.218,7.14365,5.8993,1,Dominant_Arm,1
4,2.30106,7.25857,6.09259,1,Dominant_Arm,1


In [10]:
np.unique(user[10]['Activity'])

array([ 1,  2,  3,  4,  5, 12, 13])

#### User 9 activities - Computer work, folding laundry, houe cleaning, playing soccer, rope jumping

#### "user" collection contains 24 dataframes. Each dataframes contains a position specific data of 6 different activities.
    - Calculate magnitude
    - Decision: Direct windowing/split each dataframe in different activity dataframe then apply windowing

# 2. Function Magnitude

In [11]:
# def magnitude(df):
#     ax2 = df['AccX']**2
#     ay2 = df['AccY']**2
#     az2 = df['AccZ']**2
#     am2 = ax2 + ay2 + az2
    
#     gx2 = df['GyrX']**2
#     gy2 = df['GyrY']**2
#     gz2 = df['GyrZ']**2
#     gm2 = gx2 + gy2 + gz2
    
#     mx2 = df['MagX']**2
#     my2 = df['MagY']**2
#     mz2 = df['MagZ']**2
#     mm2 = mx2 + my2 + mz2
    
#     df['Am']=am2.apply(lambda x: math.sqrt(x))
#     df['Gm']=gm2.apply(lambda x: math.sqrt(x))
#     df['Mm']=mm2.apply(lambda x: math.sqrt(x))

#### Calculate Magnitude

In [12]:
# for person_index in range(1,9):
#     idx = 0
#     for index in range(1,4):
#         if index == 1:
#             idx = person_index*10
#         elif index == 2:
#             idx = person_index*10 + 1
#         elif index == 3:
#             idx = person_index*10 + 2

#         # Calculate magnitude
#         magnitude(user[idx])

# 3. Combine Files for Normalization and Standardization

In [13]:
save_path = "/home/avijoychakma/Downloads/DTCN-AR/Dataset Preprocessing/PAMAP/ACC-Total-Normalization/"

In [27]:
total_frame = pd.concat([user[10],user[11],user[12],user[20],user[21],user[22],user[30],user[31],user[32],user[40],user[41],user[42],user[50],user[51],user[52],user[60],user[61],user[62],user[70],user[71],user[72],user[80],user[81],user[82]], ignore_index=True)
total_frame.reset_index(drop=True, inplace=True)

#### Global labelling

In [28]:
# - Sitting = 0
# - Standing = 1
# - Lying = 2
# - Walking = 3
# - Running = 4
# - Ascending stairs = 5
# - Descending stairs = 6

#### PAMAP Labelling

In [29]:
# - 1 Lying = 2
# - 2 Sitting = 0
# - 3 Standing = 1
# - 4 Walking = 3
# - 5 Running = 4
# - 12 Ascending = 5
# - 13 Descending = 6

In [30]:
# ValueError: Replacement not allowed with overlapping keys and values
# Convert the labels to some arbitrary label and then map that arbitrary label to global mapping

In [31]:
np.unique(total_frame['Activity'])

array([ 1,  2,  3,  4,  5, 12, 13])

In [32]:
# total_frame = total_frame.replace({'Activity' : { 1:2, 2:0, 3:1, 4:3, 5:4, 12:5, 13:6 }}, inplace = True)
total_frame.replace({'Activity' : { 1:100, 2:200, 3:300, 4:400, 5:500, 12:1200, 13:1300}}, inplace = True)

In [33]:
total_frame.head()

Unnamed: 0,AccX,AccY,AccZ,Person,Body_Position,Activity
0,2.2153,8.27915,5.58753,1,Dominant_Arm,100
1,2.29196,7.67288,5.74467,1,Dominant_Arm,100
2,2.2909,7.1424,5.82342,1,Dominant_Arm,100
3,2.218,7.14365,5.8993,1,Dominant_Arm,100
4,2.30106,7.25857,6.09259,1,Dominant_Arm,100


In [34]:
np.unique(total_frame['Activity'])

array([ 12,  13, 100, 200, 300, 400, 500])

In [22]:
total_frame.replace({'Activity' : { 100:2, 200:0, 300:1, 400:3, 500:4, 1200:5, 1300:6}}, inplace = True)

AttributeError: 'NoneType' object has no attribute 'replace'

In [None]:
np.unique(total_frame['Activity'])

In [16]:
total_frame.shape

(3366362, 12)

In [17]:
total_frame.head()

Unnamed: 0,AccX,AccY,AccZ,GyrX,GyrY,GyrZ,MagX,MagY,MagZ,Person,Body_Position,Activity
0,2.2153,8.27915,5.58753,-0.00475,0.037579,-0.011145,8.932,-67.9326,-19.9755,1,Dominant_Arm,1
1,2.29196,7.67288,5.74467,-0.17171,0.025479,-0.009538,9.583,-67.9584,-20.9091,1,Dominant_Arm,1
2,2.2909,7.1424,5.82342,-0.238241,0.011214,0.000831,9.05516,-67.4017,-19.5083,1,Dominant_Arm,1
3,2.218,7.14365,5.8993,-0.192912,0.019053,0.013374,9.92698,-67.4387,-20.5602,1,Dominant_Arm,1
4,2.30106,7.25857,6.09259,-0.069961,-0.018328,0.004582,9.15626,-67.1825,-20.0857,1,Dominant_Arm,1


#### Save GT

In [18]:
person_gt = np.array(total_frame['Person'])
activity_gt = np.array(total_frame['Activity'])
position_gt = np.array(total_frame['Body_Position'])

In [19]:
total_frame.drop(['Person','Activity','Body_Position'], axis=1, inplace=True)
column_name = total_frame.columns

In [21]:
scaler = preprocessing.StandardScaler()
df_standardized = scaler.fit_transform(total_frame)
df_standardized = pd.DataFrame(df_standardized)

In [20]:
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(df_standardized)
total_dataframe = pd.DataFrame(np_scaled, columns=column_name)

In [22]:
total_dataframe["Person"] = person_gt
total_dataframe["Activity"] = activity_gt
total_dataframe["Body_Position"] = position_gt

In [23]:
total_dataframe.head()

Unnamed: 0,AccX,AccY,AccZ,GyrX,GyrY,GyrZ,MagX,MagY,MagZ,Person,Activity,Body_Position
0,0.087163,0.59407,1.075536,-0.006267,0.047094,-0.00127,0.304926,-2.219446,-0.693121,1,1,Dominant_Arm
1,0.096402,0.51658,1.107448,-0.165422,0.031717,-0.000267,0.326715,-2.220479,-0.730407,1,1,Dominant_Arm
2,0.096275,0.448777,1.12344,-0.228844,0.013589,0.006209,0.309048,-2.198191,-0.674462,1,1,Dominant_Arm
3,0.087489,0.448937,1.138849,-0.185633,0.023552,0.014042,0.338228,-2.199672,-0.716473,1,1,Dominant_Arm
4,0.097499,0.463626,1.178101,-0.06843,-0.023951,0.008551,0.312432,-2.189415,-0.697522,1,1,Dominant_Arm


In [24]:
total_dataframe["Body_Position"].unique()

array(['Dominant_Arm', 'Torso', 'Dominant_Leg'], dtype=object)

#### Extract and Save Position-based each User data Files

##### User 1

In [27]:
User1_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 1) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User1_Dominant_Arm.reset_index(drop=True, inplace=True)
User1_Dominant_Arm.to_csv (save_path+'User1_Dominant_Arm.csv', index = None, header=True)

User1_Torso = total_dataframe[(total_dataframe["Person"] == 1) & (total_dataframe["Body_Position"] == "Torso")]
User1_Torso.reset_index(drop=True, inplace=True)
User1_Torso.to_csv (save_path+'User1_Torso.csv', index = None, header=True)

User1_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 1) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User1_Dominant_Leg.reset_index(drop=True, inplace=True)
User1_Dominant_Leg.to_csv (save_path+'User1_Dominant_Leg.csv', index = None, header=True)

##### User 2

In [28]:
User2_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 2) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User2_Dominant_Arm.reset_index(drop=True, inplace=True)
User2_Dominant_Arm.to_csv (save_path+'User2_Dominant_Arm.csv', index = None, header=True)

User2_Torso = total_dataframe[(total_dataframe["Person"] == 2) & (total_dataframe["Body_Position"] == "Torso")]
User2_Torso.reset_index(drop=True, inplace=True)
User2_Torso.to_csv (save_path+'User2_Torso.csv', index = None, header=True)

User2_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 2) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User2_Dominant_Leg.reset_index(drop=True, inplace=True)
User2_Dominant_Leg.to_csv (save_path+'User2_Dominant_Leg.csv', index = None, header=True)

##### User 3

In [29]:
User3_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 3) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User3_Dominant_Arm.reset_index(drop=True, inplace=True)
User3_Dominant_Arm.to_csv (save_path+'User3_Dominant_Arm.csv', index = None, header=True)

User3_Torso = total_dataframe[(total_dataframe["Person"] == 3) & (total_dataframe["Body_Position"] == "Torso")]
User3_Torso.reset_index(drop=True, inplace=True)
User3_Torso.to_csv (save_path+'User3_Torso.csv', index = None, header=True)

User3_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 3) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User3_Dominant_Leg.reset_index(drop=True, inplace=True)
User3_Dominant_Leg.to_csv (save_path+'User3_Dominant_Leg.csv', index = None, header=True)

##### User 4

In [30]:
User4_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 4) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User4_Dominant_Arm.reset_index(drop=True, inplace=True)
User4_Dominant_Arm.to_csv (save_path+'User4_Dominant_Arm.csv', index = None, header=True)

User4_Torso = total_dataframe[(total_dataframe["Person"] == 4) & (total_dataframe["Body_Position"] == "Torso")]
User4_Torso.reset_index(drop=True, inplace=True)
User4_Torso.to_csv (save_path+'User4_Torso.csv', index = None, header=True)

User4_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 4) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User4_Dominant_Leg.reset_index(drop=True, inplace=True)
User4_Dominant_Leg.to_csv (save_path+'User4_Dominant_Leg.csv', index = None, header=True)

##### User 5

In [31]:
User5_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 5) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User5_Dominant_Arm.reset_index(drop=True, inplace=True)
User5_Dominant_Arm.to_csv (save_path+'User5_Dominant_Arm.csv', index = None, header=True)

User5_Torso = total_dataframe[(total_dataframe["Person"] == 5) & (total_dataframe["Body_Position"] == "Torso")]
User5_Torso.reset_index(drop=True, inplace=True)
User5_Torso.to_csv (save_path+'User5_Torso.csv', index = None, header=True)

User5_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 5) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User5_Dominant_Leg.reset_index(drop=True, inplace=True)
User5_Dominant_Leg.to_csv (save_path+'User5_Dominant_Leg.csv', index = None, header=True)

##### User 6

In [32]:
User6_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 6) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User6_Dominant_Arm.reset_index(drop=True, inplace=True)
User6_Dominant_Arm.to_csv (save_path+'User6_Dominant_Arm.csv', index = None, header=True)

User6_Torso = total_dataframe[(total_dataframe["Person"] == 6) & (total_dataframe["Body_Position"] == "Torso")]
User6_Torso.reset_index(drop=True, inplace=True)
User6_Torso.to_csv (save_path+'User6_Torso.csv', index = None, header=True)

User6_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 6) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User6_Dominant_Leg.reset_index(drop=True, inplace=True)
User6_Dominant_Leg.to_csv (save_path+'User6_Dominant_Leg.csv', index = None, header=True)

##### User 7

In [33]:
User7_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 7) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User7_Dominant_Arm.reset_index(drop=True, inplace=True)
User7_Dominant_Arm.to_csv (save_path+'User7_Dominant_Arm.csv', index = None, header=True)

User7_Torso = total_dataframe[(total_dataframe["Person"] == 7) & (total_dataframe["Body_Position"] == "Torso")]
User7_Torso.reset_index(drop=True, inplace=True)
User7_Torso.to_csv (save_path+'User7_Torso.csv', index = None, header=True)

User7_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 7) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User7_Dominant_Leg.reset_index(drop=True, inplace=True)
User7_Dominant_Leg.to_csv (save_path+'User7_Dominant_Leg.csv', index = None, header=True)

##### User 8

In [34]:
User8_Dominant_Arm = total_dataframe[(total_dataframe["Person"] == 8) & (total_dataframe["Body_Position"] == "Dominant_Arm")]
User8_Dominant_Arm.reset_index(drop=True, inplace=True)
User8_Dominant_Arm.to_csv (save_path+'User8_Dominant_Arm.csv', index = None, header=True)

User8_Torso = total_dataframe[(total_dataframe["Person"] == 8) & (total_dataframe["Body_Position"] == "Torso")]
User8_Torso.reset_index(drop=True, inplace=True)
User8_Torso.to_csv (save_path+'User8_Torso.csv', index = None, header=True)

User8_Dominant_Leg = total_dataframe[(total_dataframe["Person"] == 8) & (total_dataframe["Body_Position"] == "Dominant_Leg")]
User8_Dominant_Leg.reset_index(drop=True, inplace=True)
User8_Dominant_Leg.to_csv (save_path+'User8_Dominant_Leg.csv', index = None, header=True)

# 4. Windowing

In [35]:
Torso_dataset = []
Dominant_Arm_dataset = []
Dominant_Leg_dataset = []

Torso_gt = []
Dominant_Arm_gt = []
Dominant_Leg_gt = []

In [36]:
position = ['Dominant_Arm', 'Torso', 'Dominant_Leg']

In [37]:
win_size=128
step_size=64
AXIS = 3
START = AXIS +1
END = AXIS +2

In [39]:
for position_index in tqdm(range(1,4)): #Back, RUA, RLA, LUA, LLA
    for person_index in range(1,9): # 4 Persons
        file_name = "User"+str(person_index) + "_"+position[position_index-1]
        
        df = pd.read_csv(save_path+file_name+'.csv', sep=",")   
        len_df = df.shape[0]
        narray = df.to_numpy()

        for i in range(0, len_df, step_size):
            window = narray[i:i+win_size,0:AXIS]
            
            if window.shape[0] != win_size:
                continue
            else:
                reshaped_window = window.reshape(1,win_size,1,AXIS)
                gt = np.bincount(narray[i:i+win_size,START:END].astype(int).ravel()).argmax()
#                 print(gt)
                
                if position_index == 1:
                    Dominant_Arm_dataset.append(reshaped_window)
                    Dominant_Arm_gt.append(gt)
                elif position_index == 2:
                    Torso_dataset.append(reshaped_window)
                    Torso_gt.append(gt)
                elif position_index == 3:
                    Dominant_Leg_dataset.append(reshaped_window)
                    Dominant_Leg_gt.append(gt)

100%|██████████| 3/3 [00:05<00:00,  1.95s/it]
