In [None]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

### PAMAP2

In [None]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

In [None]:
def generate_three_IMU(name):
    x = name +'_x'
    y = name +'_y'
    z = name +'_z'
    return [x,y,z]

def generate_four_IMU(name):
    x = name +'_x'
    y = name +'_y'
    z = name +'_z'
    w = name +'_w'
    return [x,y,z,w]

def generate_cols_IMU(name):
    temp = name+'_temperature'
    output = [temp]
    acceleration16 = name+'_3D_acceleration_16'
    acceleration16 = generate_three_IMU(acceleration16)
    output.extend(acceleration16)
    acceleration6 = name+'_3D_acceleration_6'
    acceleration6 = generate_three_IMU(acceleration6)
    output.extend(acceleration6)
    gyroscope = name+'_3D_gyroscope'
    gyroscope = generate_three_IMU(gyroscope)
    output.extend(gyroscope)
    magnometer = name+'_3D_magnetometer'
    magnometer = generate_three_IMU(magnometer)
    output.extend(magnometer)
    oreintation = name+'_4D_orientation'
    oreintation = generate_four_IMU(oreintation)
    output.extend(oreintation)
    return output

def load_IMU():
    output = ['time_stamp','activity_id', 'heart_rate']
    hand = 'hand'
    hand = generate_cols_IMU(hand)
    output.extend(hand)
    chest = 'chest'
    chest = generate_cols_IMU(chest)
    output.extend(chest)
    ankle = 'ankle'
    ankle = generate_cols_IMU(ankle)
    output.extend(ankle)
    return output

def load_subjects(root='./Protocol/subject'):
    output = pd.DataFrame()
    cols = load_IMU()

    for i in range(101,110):
        path = root + str(i) +'.dat'
        subject = pd.read_table(path, header=None, sep='\s+')
        subject.columns = cols
        subject['id'] = i
        output = pd.concat([output, subject], ignore_index=True)
    output.reset_index(drop=True, inplace=True)
    return output

data = load_subjects()

In [None]:
data = data.drop(['hand_4D_orientation_x', 'hand_4D_orientation_y',
                  'hand_4D_orientation_z', 'hand_4D_orientation_w',
                'chest_4D_orientation_x', 'chest_4D_orientation_y',
                  'chest_4D_orientation_z', 'chest_4D_orientation_w',
                'ankle_4D_orientation_x', 'ankle_4D_orientation_y',
                  'ankle_4D_orientation_z', 'ankle_4D_orientation_w'],
        axis = 1)

def fix_data(data):
    data = data.drop(data[data['activity_id']==0].index)
    data = data.interpolate()
    for colName in data.columns:
        data[colName] = data[colName].fillna(data[colName].mean())
    activity_mean = data.groupby(['activity_id']).mean().reset_index()
    return data


data = fix_data(data)
print('Df shape', data.shape)
print('No of subjects', len(data.groupby(['id']).mean().reset_index()))
print('No of total activities',len(data.groupby(['activity_id']).mean().reset_index()))

In [None]:
data.to_pickle('pamap2_for_lstm.pkl')

### MHEALTH

In [None]:
cd "/content/drive/MyDrive/MHEALTHDATASET/"

/content/drive/MyDrive/MHEALTHDATASET


In [None]:
activity_mapping = {0: 'NO_ACTIVITY', 1: 'STANDING', 2: 'SITTING', 3: 'LAYING', \
                    4: 'WALKING', 5: 'WALKING_UPSTAIRS', 6: 'WAIST_BEND_FORWARD',\
                    7: 'FRONTAL_ARMS', 8: 'KNEE_BEND', 9: 'CYCLING', \
                    10: 'JOGGING', 11: 'RUNNING', 12: 'JUMP'}

In [None]:
df = pd.DataFrame()
for i in range(1, 11):
    df_s = pd.read_csv(f'./mHealth_subject{i}.log', header=None, sep='\t')
    df_s = df_s.loc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]].rename(columns= {0: 'acc_ch_x',1: 'acc_ch_y',2: 'acc_ch_z',3: 'ecg_sig_1',4: 'ecg_sig_2',5: 'acc_la_x',6: 'acc_la_y',7: 'acc_la_z',8: 'gyr_la_x',9: 'gyr_la_y',10: 'gyr_la_z',11: 'mag_la_x',12: 'mag_la_y',13: 'mag_la_z',14: 'acc_rw_x',15: 'acc_rw_y',16: 'acc_rw_z',17: 'gyr_rw_x',18: 'gyr_rw_y',19: 'gyr_rw_z',20: 'mag_rw_x',21: 'mag_rw_y',22: 'mag_rw_z',23: 'activity'})
    df_s['subject'] = f'subject{i}'
    df = pd.concat([df, df_s])
df_mhealth = df

In [None]:
df_before = df_mhealth.copy()

null_act = df_mhealth[df_mhealth['activity'] == 0]
other_act = df_mhealth[df_mhealth['activity'] != 0]
num_samples_per_subject = 30720 // len(df_mhealth['subject'].unique())

resampled_null_act = pd.concat([
    null_act[null_act['subject'] == subject_id].sample(n=num_samples_per_subject, replace=True, random_state=1)
    for subject_id in df_mhealth['subject'].unique()
])

df_mhealth = pd.concat([resampled_null_act, other_act])

In [None]:
df_mhealth = df_mhealth.fillna(df_mhealth.mean(numeric_only=True))

In [None]:
df_mhealth.to_pickle('mhealth_for_lstm.pkl')