In [2]:
import pandas as pd
import numpy as np
from scipy import signal
import hickle as hkl
import os

In [3]:
loadList = ['Phones_accelerometer.csv','Phones_gyroscope.csv','Watch_accelerometer.csv','Watch_gyroscope.csv']
classCounts = ['sit', 'stand', 'walk', 'stairsup', 'stairsdown', 'bike']
deviceCounts = ['nexus4', 'lgwatch','s3', 's3mini','gear','samsungold']
deviceSamplingRate = [200,200,150,100,100,50]
deviceWindowFrame = [512,512,384,256,256,128]
downSamplingRate = [4,4,3,2,2,1]
subDeviceCounts = ['nexus4_1', 'nexus4_2', 'lgwatch_1', 'lgwatch_2', 's3_1', 's3_2', 's3mini_1', 's3mini_2','gear_1', 'gear_2','samsungold_1', 'samsungold_2']
userCounts = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']

In [4]:
def load_data(filename):
    data = pd.read_csv(filename)
    data.columns = ['index', 'Arrival_Time', 'Creation_Time', 'x', 'y', 'z', 'User', 'Model', 'Device', 'gt']
    data = data.drop(['index', 'Arrival_Time', 'Creation_Time'], axis=1)
    return data

#load the data
#combine the dataframes

accel = [load_data('../datasets/extracted/Activity recognition exp/Phones_accelerometer.csv'), load_data('../datasets/extracted/Activity recognition exp/Watch_accelerometer.csv')]

gyro = [load_data('../datasets/extracted/Activity recognition exp/Phones_gyroscope.csv'), load_data('../datasets/extracted/Activity recognition exp/Watch_gyroscope.csv')]
gyro = pd.concat(gyro)
accel = pd.concat(accel)
accel.head()

Unnamed: 0,x,y,z,User,Model,Device,gt
0,-5.958191,0.688065,8.135345,a,nexus4,nexus4_1,stand
1,-5.95224,0.670212,8.136536,a,nexus4,nexus4_1,stand
2,-5.995087,0.653549,8.204376,a,nexus4,nexus4_1,stand
3,-5.942718,0.676163,8.128204,a,nexus4,nexus4_1,stand
4,-5.991516,0.641647,8.135345,a,nexus4,nexus4_1,stand


In [5]:
def segment_data(data, time_step, step:int):
    segmentAccData = []
    step = int(step)
    for i in range(0, data.shape[0] - time_step,step):
        segmentAccData.append(data[i:i+time_step,:])
    return np.array(segmentAccData)

In [6]:
def downSampleLowPass(motionData,factor):
    accX = signal.decimate(motionData[:,:,0],factor)
    accY = signal.decimate(motionData[:,:,1],factor)
    accZ = signal.decimate(motionData[:,:,2],factor)
    gyroX = signal.decimate(motionData[:,:,3],factor)
    gyroY = signal.decimate(motionData[:,:,4],factor)
    gyroZ = signal.decimate(motionData[:,:,5],factor)
    return np.dstack((accX,accY,accZ,gyroX,gyroY,gyroZ))

In [None]:
index_offset = 0
all_processed_data = {}
all_processed_label = {}
device_index = {}
device_indicies = {new_list: [] for new_list in range(len(deviceCounts))}
# now you want to go through all devices
for client_dev_index, device_name in enumerate(deviceCounts):
    device_data_acc = accel.loc[accel['Model'] == device_name]
    device_users = device_data_acc['User'].unique()
    device_data_gyro = gyro.loc[gyro['Model'] == device_name]
    device_users_gyro = device_data_gyro['User'].unique()

        # now you want to go through all users
        # if there is an empty dataframe then skip
    if device_data_acc.empty or device_data_gyro.empty:
        continue
        # there is one user that does not use accelerometer
    for user_index, user in enumerate(device_users_gyro):
        user_device_acc = device_data_acc.loc[device_data_acc['User'] == user]
        user_device_gyro = device_data_gyro.loc[device_data_gyro['User'] == user]

        processed_class_data = []
        processed_class_label = []
            # now you want to go through all classes
        for class_index, class_name in enumerate(classCounts):
            if user_device_acc.size <= user_device_gyro.size:  # if more entries for gyro then use acc (choose the smallest class data set)
                smallest_data = user_device_acc.loc[user_device_acc['gt'] == class_name]
                filtered_gyro_data_ind = smallest_data.index
            else:
                smallest_data = user_device_gyro.loc[user_device_gyro['gt'] == class_name]
                filtered_gyro_data_ind = smallest_data.index
                # print(smallest_class_data)
                # print(smallest_data)
            if(smallest_data.empty):
                continue
            window_frame = deviceWindowFrame[int(client_dev_index / 2)]

            split_res = np.split(smallest_data, np.where(np.diff(filtered_gyro_data_ind.to_numpy()) != 1)[0] + 1)
            split_res = [newArr for newArr in split_res if len(newArr) >= window_frame]
            indicies = (list(map(lambda x: x.index, split_res)))
                # now that I have the indicies I can segment each dasta
            for segmented_class_range in indicies:
                acc_loc = accel.iloc[segmented_class_range]
                acc_selected = acc_loc[['x', 'y', 'z']].to_numpy()
                gyro_loc = gyro.iloc[segmented_class_range]
                gyro_selected = gyro_loc[['x', 'y', 'z']].to_numpy()

                combined_data = np.dstack((segment_data(acc_selected, deviceWindowFrame[client_dev_index],
                                                            deviceWindowFrame[client_dev_index] / 2),
                                               segment_data(gyro_selected, deviceWindowFrame[client_dev_index],
                                                            deviceWindowFrame[client_dev_index] / 2)))

                processed_class_data.append(combined_data)
                processed_class_label.append(np.full(combined_data.shape[0], class_index, dtype=int))
                # print(len(processed_class_data), 'the len after run')
                    # print(processed_class_label[0].shape, processed_class_label)
            temp_processed = np.vstack((processed_class_data))  # TODO for some reason sometimes this is empty
            if client_dev_index < 5:
                temp_processed = downSampleLowPass(np.float32(temp_processed),
                                                       downSamplingRate[client_dev_index])
            data_index = (len(userCounts) * client_dev_index) + user_index - index_offset
            all_processed_data[data_index] = temp_processed
            all_processed_label[data_index] = np.hstack(processed_class_label)
            device_index[data_index] = np.full(all_processed_label[data_index].shape[0], client_dev_index)
            device_indicies[client_dev_index].append(data_index)
                # print(processed_class_data)
print(all_processed_data)

In [39]:
largest_client = max([client.shape[0] for client in all_processed_data.values()])
#pad the client data
all_processed_data_padded = [np.pad(client, ((0, largest_client - client.shape[0]), (0, 0), (0, 0)), 'constant', constant_values=np.nan)
                                 for client in all_processed_data.values()]


all_processed_label_padded = [np.pad(client, ((0, largest_client - client.shape[0])), 'constant', constant_values=-1)
                                 for client in all_processed_label.values()]



# print(all_processed_data_padded) # all labels have been padded with -1 and all data has been padded with nan
# print(all_processed_label_padded)
padded_arr = np.asarray(all_processed_data_padded)
padded_label = np.asarray(all_processed_label_padded)
# padded_label = np.asarray(all_processed_label)
# print(all_processed_label_pad.shape)
padded_arr.shape, padded_label.shape

((44, 2457, 128, 6), (44, 2457))

In [50]:
delete_items = []
# removing clients that do not have all classes
for index, i in enumerate(padded_label):
    filtered = list(filter(lambda x: x!=-1,np.unique(i)))
    if(len(filtered) < len(classCounts)):
        print('dropping client', index, filtered)
        delete_items.append(index)
        for k, v in device_indicies.items():
            if index in v:
                device_indicies[k].remove(index)
                
padded_dropped_data =  np.delete(padded_arr, delete_items, axis=0)
padded_dropped_label = np.delete(padded_label, delete_items, axis=0)
print(padded_dropped_data.shape, padded_dropped_label.shape)

dropping client 16 [1, 3, 5]
dropping client 43 [0, 2, 3, 4, 5]
(42, 2457, 128, 6) (42, 2457)


In [52]:
[client.shape for client in padded_dropped_data]

[(2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6),
 (2457, 128, 6)]

In [60]:
device_size = []
# print(device_indicies)
for k,v in device_indicies.items():
    device_indicies[k] = list(dict.fromkeys(v))
    
print(device_indicies)
for k, v in device_indicies.items():
    device_size.append(len(v))
    
print(device_size)

{0: [0, 1, 2, 3, 4, 5, 6, 7, 8], 1: [9, 10, 11, 12, 13, 14, 15, 16, 17], 2: [18, 19, 20, 21, 22, 23, 24, 25, 26], 3: [27, 28, 29, 30, 31, 32, 33, 34, 35], 4: [36, 37, 38, 39, 40, 41, 42, 43], 5: []}
[9, 9, 9, 9, 8, 0]


In [54]:
device_size

[54, 51, 54, 54, 46, 0]

In [55]:
end_index = 0
for i in device_size:
    print(i)

54
51
54
54
46
0
