In [25]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
cd '/content/drive/Shared drives/Rahul_Mohit/ECE202A/WALG/Data/PAMAP2'

/content/drive/Shared drives/Rahul_Mohit/ECE202A/WALG/Data/PAMAP2


In [28]:
import numpy as np
import pandas as pd
from os import listdir
import os.path
import zipfile
from keras.utils.np_utils import to_categorical
import json

In [29]:
## Function to split activities
def split_activities(labels, X, exclude_activities, borders=10 * 100):
    tot_len = len(labels)
    startpoints = np.where([1] + [labels[i] != labels[i - 1]
                                  for i in range(1, tot_len)])[0]
    endpoints = np.append(startpoints[1:] - 1, tot_len - 1)
    acts = [labels[s] for s, e in zip(startpoints, endpoints)]
    # Also split up the data, and only keep the non-zero activities
    xysplit = [(X[s + borders:e - borders + 1, :], a)
               for s, e, a in zip(startpoints, endpoints, acts)
               if a not in exclude_activities and e-borders+1>=0 and s+borders<tot_len]
    xysplit = [(Xs, y) for Xs, y in xysplit if len(Xs) > 0]
    Xlist = [Xs for Xs, y in xysplit]
    ylist = [y for X, y in xysplit]
    return Xlist, ylist

## Function to split data into windows
def sliding_window(frame_length, step, Xsampleslist, ysampleslist):
    Xsamples = []
    ysamples = []
    for j in range(len(Xsampleslist)):
        X = Xsampleslist[j]
        ybinary = ysampleslist[j]
        for i in range(0, X.shape[0] - frame_length, step):
            xsub = [];
            for k in range(0,frame_length,5):
                xsub.append(X[i+k,:]);
            xsub.append(np.mean(xsub[0:200],axis=0));
            xsub.append(np.std(xsub[0:200],axis=0,dtype=np.float64))
            #xsub = X[i:i + frame_length, :]
            ysub = ybinary
            Xsamples.append(xsub)
            ysamples.append(ysub)
    return Xsamples, ysamples

## Function to transform labels
def transform_y(y, mapclasses, nr_classes):
    ymapped = np.array([mapclasses[c] for c in y], dtype='int')
    ybinary = to_categorical(ymapped, nr_classes)
    return ybinary

## Function to get headers
def get_header():
    axes = ['x', 'y', 'z']
    IMUsensor_columns = ['temperature'] + \
        ['acc_16g_' + i for i in axes] + \
        ['acc_6g_' + i for i in axes] + \
        ['gyroscope_' + i for i in axes] + \
        ['magnometer_' + i for i in axes] + \
        ['orientation_' + str(i) for i in range(4)]
    header = ["timestamp", "activityID", "heartrate"] + ["hand_" + s for s in IMUsensor_columns] \
        + ["chest_" + s for s in IMUsensor_columns] + ["ankle_" + s for s in IMUsensor_columns]
    return header

## Function to add header to the data
def addheader(datasets):
    header = get_header()
    for i in range(0, len(datasets)):
        datasets[i].columns = header
    return datasets

## Function to store data as numpy files.
def numpify_and_store(X, y, X_name, y_name, outdatapath, shuffle=False):
    X = np.array(X)
    y = np.array(y)
    # Shuffle the train set
    if shuffle is True:
        np.random.seed(123)
        neworder = np.random.permutation(X.shape[0])
        X = X[neworder, :, :]
        y = y[neworder, :]
    # Save binary file
    xpath = os.path.join(outdatapath, X_name)
    ypath = os.path.join(outdatapath, y_name)
    np.save(xpath, X)
    np.save(ypath, y)
    print('Storing ' + xpath, ypath)

## Function to map classes.
def map_class(datasets_filled, exclude_activities):
    ysetall = [set(np.array(data.activityID)) - set(exclude_activities)
               for data in datasets_filled]
    class_ids = list(set.union(*[set(y) for y in ysetall]))
    class_labels = [ACTIVITIES_MAP[i] for i in class_ids]
    for i in class_ids:
      print(ACTIVITIES_MAP[i])
    nr_classes = len(class_ids)
    mapclasses = {class_ids[i]: i for i in range(len(class_ids))}
    return class_labels, nr_classes, mapclasses

## Function to split data.
def split_data(Xlists, ybinarylists, indices):
    tty = str(type(indices))
    if tty == "<class 'slice'>" or tty == "<type 'slice'>":
        x_setlist = [X for Xlist in Xlists[indices] for X in Xlist]
        y_setlist = [y for ylist in ybinarylists[indices] for y in ylist]
    else:
        x_setlist = [X for X in Xlists[indices]]
        y_setlist = [y for y in ybinarylists[indices]]
    return x_setlist, y_setlist

In [30]:
## Variables
frame_length = int(1000);
step         = 200;
datadir = os.path.join('..', 'PAMAP2_Dataset', 'Protocol');
columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
                  'hand_gyroscope_x', 'hand_gyroscope_y', 'hand_gyroscope_z', 
                  'hand_magnometer_x', 'hand_magnometer_y', 'hand_magnometer_z',
                  'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z',
                  'ankle_gyroscope_x', 'ankle_gyroscope_y', 'ankle_gyroscope_z',
                  'ankle_magnometer_x', 'ankle_magnometer_y', 'ankle_magnometer_z',
                  'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z',
                  'chest_gyroscope_x', 'chest_gyroscope_y', 'chest_gyroscope_z',
                  'chest_magnometer_x', 'chest_magnometer_y', 'chest_magnometer_z']

ACTIVITIES_MAP = {
    0: 'no_activity',
    1: 'lying',
    2: 'sitting',
    3: 'standing',
    4: 'walking',
    5: 'running',
    6: 'cycling',
    7: 'nordic_walking',
    9: 'watching_tv',
    10: 'computer_work',
    11: 'car_driving',
    12: 'ascending_stairs',
    13: 'descending_stairs',
    16: 'vaccuum_cleaning',
    17: 'ironing',
    18: 'folding_laundry',
    19: 'house_cleaning',
    20: 'playing_soccer',
    24: 'rope_jumping'
};

In [31]:
filenames = os.listdir(datadir)
filenames.sort()
print('Start pre-processing all ' + str(len(filenames)) + ' files...')
# Load the files and put them in a list of pandas dataframes:
datasets = [pd.read_csv(os.path.join(datadir, fn), header=None, sep=' ') for fn in filenames]
datasets = addheader(datasets)  # add headers to the datasets
# Interpolate dataset to get same sample rate between channels
datasets_filled = [d.interpolate() for d in datasets]
print('loaded the dataset')
class_labels, nr_classes, mapclasses = map_class(datasets_filled, exclude_activities=[0])

# Create input (x) and output (y) sets
xall = [np.array(data[columns_to_use]) for data in datasets_filled]
yall = [np.array(data.activityID) for data in datasets_filled]

Start pre-processing all 9 files...
loaded the dataset
lying
sitting
standing
walking
running
cycling
nordic_walking
ascending_stairs
descending_stairs
vaccuum_cleaning
ironing
rope_jumping


In [32]:
for activity in [2, 4]:
  exclude_activities = list(ACTIVITIES_MAP.keys()-[activity]);
  outdatapath = ACTIVITIES_MAP[activity];
  if not os.path.exists(outdatapath):
    os.mkdir(outdatapath);

  xylists = [split_activities(y, x, exclude_activities) for x, y in zip(xall, yall)];
  Xlists, ylists = zip(*xylists);
  ybinarylists = [transform_y(y, mapclasses, nr_classes) for y in ylists];

  # Split in train, test and val
  train_range_1 = slice(0, 4);
  train_range_2 = slice(6, len(datasets_filled));
  x_vallist, y_vallist         = split_data(Xlists, ybinarylists, indices=4);
  x_testlist, y_testlist       = split_data(Xlists, ybinarylists, indices=5);
  x_trainlist_1, y_trainlist_1 = split_data(Xlists, ybinarylists, indices=train_range_1);
  x_trainlist_2, y_trainlist_2 = split_data(Xlists, ybinarylists, indices=train_range_2);

  # Take sliding-window frames, target is label of last time step, and store as numpy file
  x_train_1, y_train_1 = sliding_window(frame_length, step, x_trainlist_1, y_trainlist_1);
  x_train_2, y_train_2 = sliding_window(frame_length, step, x_trainlist_2, y_trainlist_2);
  x_train = x_train_1 + x_train_2;
  y_train = y_train_1 + y_train_2;

  x_val, y_val   = sliding_window(frame_length, step, x_vallist, y_vallist);
  x_test, y_test = sliding_window(frame_length, step, x_testlist, y_testlist);

  numpify_and_store(
    x_train, 
    y_train, 
    X_name='X_PAMAP2_train', 
    y_name='y_PAMAP2_train',
    outdatapath=outdatapath, 
    shuffle=True
  );
  numpify_and_store(
      x_val, 
      y_val, 
      X_name='X_PAMAP2_val', 
      y_name='y_PAMAP2_val',
      outdatapath=outdatapath, 
      shuffle=False
  );
  numpify_and_store(
      x_test, 
      y_test, 
      X_name='X_PAMAP2_test', 
      y_name='y_PAMAP2_test',
      outdatapath=outdatapath, 
      shuffle=False
  );

  print('Processed data succesfully stored in ' + outdatapath)

Storing sitting/X_PAMAP2_train sitting/y_PAMAP2_train
Storing sitting/X_PAMAP2_val sitting/y_PAMAP2_val
Storing sitting/X_PAMAP2_test sitting/y_PAMAP2_test
Processed data succesfully stored in sitting
Storing walking/X_PAMAP2_train walking/y_PAMAP2_train
Storing walking/X_PAMAP2_val walking/y_PAMAP2_val
Storing walking/X_PAMAP2_test walking/y_PAMAP2_test
Processed data succesfully stored in walking
