In [1]:
import numpy as np
import pandas as pd
from pandas import datetime

import scipy.io

from matplotlib import pyplot

import os

  from pandas import datetime


In [2]:
!ls data/gordon/

exp_1_data.mat exp_3_data.mat exp_5_data.mat [34mpreprocessed[m[m
exp_2_data.mat exp_4_data.mat exp_6_data.mat readme.pdf


In [3]:
folder = "data/gordon"

## Process of Acceleration Data

In [8]:
save_folder = "data/gordon/preprocessed/acc"

In [9]:
exps = []
#Load experiments data
for i in range(1,7):
    exp = scipy.io.loadmat(os.path.join(folder, "exp_%d_data.mat"%i))
    exps.append(exp)
    
#compute total time
total_time = 0
for i in range(6):
    time_max = exps[i]["mAcc"][:,1].max()
    time_min = exps[i]["mAcc"][:,1].min()
    total_time += time_max-time_min
    
print("Total time: ", total_time)

#compute unit time(s)
#Total experiment time: 51min
timeunit = (51*60)/total_time
print("Unit time (s): ", timeunit)

Total time:  1886252.0
Unit time (s):  0.0016222646814953675


In [10]:
#Group configuration of agents
groups = [[[1,2,3,4,5,6,7,9,10,11]],
          [[1,2,3,4,5],[6,7,9,10,11]],
          [[1,2,3],[4,5,6],[7,9],[10,11]],
          [[1],[2],[3],[4],[5],[6],[7],[9],[10],[11]],
          [[1,2,3,5],[4,6,7],[9,10,11]],
          [[1,2,3,4,5,6],[7,9,10,11]]]

group_relations = []
mAcc_data = []
columns = ["ExpID","TS","userID","gID","isLeader", "xAcc", "yAcc","zAcc", "magACC"]

for exp_id in range(len(exps)-1):    
    print("Experiment %d"%(exp_id+1))
    extract_mAcc_expi = []
    expi_df = pd.DataFrame(exps[exp_id]["mAcc"], columns=columns)
    #extract userIDs
    userIDs = expi_df["userID"].unique()
    userIDs_map = {userID:idx for idx, userID in enumerate(userIDs)}
    
    #create group relations
    gr_i = np.zeros((10,10))
    groups_i = groups[exp_id]
    groups_i_converted = []
    for group in groups_i:
        group_converted = [userIDs_map[user] for user in group]
        groups_i_converted.append(group_converted)
    for group in groups_i_converted:
        for i in group:
            for j in group:
                gr_i[i,j]=1
    group_relations.append(gr_i)
    
    #mapping userIDs in pandas dataframe
    expi_df["userID_new"] = expi_df["userID"].apply(lambda x:userIDs_map[x])
    expi_df["TS_new"] = expi_df["TS"].apply(lambda x:timeunit*x)
    expi_df["Timestamp"] = expi_df["TS_new"].apply(lambda x: pd.Timestamp(x, unit='s'))
    expi_df = expi_df.set_index("Timestamp")
    
    #get common time interval
    max_start = expi_df[expi_df["userID_new"]==0].index.min()
    min_end = expi_df[expi_df["userID_new"]==0].index.max()
    for userID in range(10):        
        start = expi_df[expi_df["userID_new"]==userID].index.min()
        end = expi_df[expi_df["userID_new"]==userID].index.max()
        if start > max_start:
            max_start = start
        if end < min_end:
            min_end = end       
        
    for userID in range(10):    
        expi_df_userID = expi_df[expi_df.userID_new==userID]
        resampled = expi_df_userID[expi_df_userID.userID_new==userID].resample("200ms", origin=max_start).mean()
        resampled = resampled.interpolate()#interpolate nan values
        data = resampled[["xAcc", "yAcc", "zAcc"]].values
        extract_mAcc_expi.append(data)
    
    #compute length
    min_length = np.inf    
    for i in range(len(extract_mAcc_expi)):
        data = extract_mAcc_expi[i]
        length = data.shape[0]
        if length < min_length:
            min_length = length
            
    print("Min length of Exp%d:"%exp_id, min_length)    
    for i in range(len(extract_mAcc_expi)):
        data = extract_mAcc_expi[i]
        data = data[:min_length]
        extract_mAcc_expi[i] = data
        
    extract_mAcc_expi = np.array(extract_mAcc_expi)   
    mAcc_data.append(extract_mAcc_expi)

Experiment 1
Min length of Exp0: 2634
Experiment 2
Min length of Exp1: 2669
Experiment 3
Min length of Exp2: 2710
Experiment 4
Min length of Exp3: 2642
Experiment 5
Min length of Exp4: 2281


### Create Training, Validation and Test examples

In [11]:
examples = []
labels = []

window_length = 20

for expID in range(5):
    data_i = mAcc_data[expID]
    label_i = group_relations[expID]
    
    time_length = data_i.shape[1]
    time_indices = np.arange(time_length)
    window_starts = time_indices[0:time_length-window_length:int(0.5*window_length)]
    for t in window_starts:
        examples.append(data_i[:,t:t+window_length,:])
        labels.append(label_i)
        
examples = np.array(examples)
labels = np.array(labels)

assert examples.shape[0]==labels.shape[0]
print("Total Examples: ", examples.shape[0])

#shuffle the examples
indices = np.arange(examples.shape[0])
np.random.shuffle(indices)

train_idx = int(indices.shape[0]*0.6)
valid_idx = int(indices.shape[0]*0.8)

print("Train index: ", train_idx)
print("Valid index: ", valid_idx)

train_indices = indices[:train_idx]
valid_indices = indices[train_idx:valid_idx]
test_indices = indices[valid_idx:]

examples_train = examples[train_indices]
labels_train = labels[train_indices]
examples_valid = examples[valid_indices]
labels_valid = labels[valid_indices]
examples_test = examples[test_indices]
labels_test = labels[test_indices]

#save preprocessed data
with open(os.path.join(save_folder, "examples_train.npy"), 'wb') as f:
    np.save(f, examples_train)
with open(os.path.join(save_folder, "examples_valid.npy"), 'wb') as f:
    np.save(f, examples_valid)
with open(os.path.join(save_folder, "examples_test.npy"), 'wb') as f:
    np.save(f, examples_test)
with open(os.path.join(save_folder, "labels_train.npy"), 'wb') as f:
    np.save(f, labels_train)
with open(os.path.join(save_folder, "labels_valid.npy"),'wb') as f:
    np.save(f, labels_valid)
with open(os.path.join(save_folder, "labels_test.npy"),'wb') as f:
    np.save(f, labels_test)

Total Examples:  1286
Train index:  771
Valid index:  1028


## Process of Orientation Data

In [12]:
save_folder = "data/gordon/preprocessed/orien/"

In [13]:
exps = []
#Load experiments data
for i in range(1,7):
    exp = scipy.io.loadmat(os.path.join(folder, "exp_%d_data.mat"%i))
    exps.append(exp)
    
#compute total time
total_time = 0
for i in range(6):
    time_max = exps[i]["mOr"][:,1].max()
    time_min = exps[i]["mOr"][:,1].min()
    total_time += time_max-time_min
    
print("Total time: ", total_time)

#compute unit time(s)
#Total experiment time: 51min
timeunit = (51*60)/total_time
print("Unit time (s): ", timeunit)

Total time:  1886243
Unit time (s):  0.0016222724219519967


In [14]:
#Group configuration of agents
groups = [[[1,2,3,4,5,6,7,9,10,11]],
          [[1,2,3,4,5],[6,7,9,10,11]],
          [[1,2,3],[4,5,6],[7,9],[10,11]],
          [[1],[2],[3],[4],[5],[6],[7],[9],[10],[11]],
          [[1,2,3,5],[4,6,7],[9,10,11]],
          [[1,2,3,4,5,6],[7,9,10,11]]]

group_relations = []
mOr_data = []
columns = ["ExpID","TS","userID","gID","isLeader", "azimuth", "pitch","roll"]

for exp_id in range(len(exps)-1):    
    print("Experiment %d"%(exp_id+1))
    extract_mOr_expi = []
    expi_df = pd.DataFrame(exps[exp_id]["mOr"], columns=columns)
    #extract userIDs
    userIDs = expi_df["userID"].unique()
    userIDs_map = {userID:idx for idx, userID in enumerate(userIDs)}
        
    #create group relations
    gr_i = np.zeros((10,10))
    groups_i = groups[exp_id]
    groups_i_converted = []
    for group in groups_i:
        group_converted = [userIDs_map[user] for user in group]
        groups_i_converted.append(group_converted)
    for group in groups_i_converted:
        for i in group:
            for j in group:
                gr_i[i,j]=1
    group_relations.append(gr_i)
    
    #mapping userIDs in pandas dataframe
    expi_df["userID_new"] = expi_df["userID"].apply(lambda x:userIDs_map[x])
    expi_df["TS_new"] = expi_df["TS"].apply(lambda x:timeunit*x)
    expi_df["Timestamp"] = expi_df["TS_new"].apply(lambda x: pd.Timestamp(x, unit='s'))
    expi_df = expi_df.set_index("Timestamp")
    
    #get common time interval
    max_start = expi_df[expi_df["userID_new"]==0].index.min()
    min_end = expi_df[expi_df["userID_new"]==0].index.max()
    for userID in range(10):        
        start = expi_df[expi_df["userID_new"]==userID].index.min()
        end = expi_df[expi_df["userID_new"]==userID].index.max()
        if start > max_start:
            max_start = start
        if end < min_end:
            min_end = end  
            
    for userID in range(10):    
        expi_df_userID = expi_df[expi_df.userID_new==userID]
        resampled = expi_df_userID[expi_df_userID.userID_new==userID].resample("200ms", origin=max_start).mean()
        resampled = resampled.interpolate()#interpolate nan values
        data = resampled[["azimuth"]].values
        extract_mOr_expi.append(data)
        
    #compute length
    min_length = np.inf    
    for i in range(len(extract_mOr_expi)):
        data = extract_mOr_expi[i]
        length = data.shape[0]
        if length < min_length:
            min_length = length
            
    print("Min length of Exp%d:"%exp_id, min_length)    
    for i in range(len(extract_mOr_expi)):
        data = extract_mOr_expi[i]
        data = data[:min_length]
        extract_mOr_expi[i] = data
        
    extract_mOr_expi = np.array(extract_mOr_expi)   
    mOr_data.append(extract_mOr_expi)

Experiment 1
Min length of Exp0: 2633
Experiment 2
Min length of Exp1: 2667
Experiment 3
Min length of Exp2: 2710
Experiment 4
Min length of Exp3: 2641
Experiment 5
Min length of Exp4: 2281


In [16]:
examples = []
labels = []

window_length = 20

for expID in range(5):
    data_i = mOr_data[expID]
    label_i = group_relations[expID]
    
    time_length = data_i.shape[1]
    time_indices = np.arange(time_length)
    window_starts = time_indices[0:time_length-window_length:int(0.5*window_length)]
    for t in window_starts:
        examples.append(data_i[:,t:t+window_length,:])
        labels.append(label_i)
        
examples = np.array(examples)
labels = np.array(labels)

assert examples.shape[0]==labels.shape[0]
print("Total Examples: ", examples.shape[0])

#shuffle the examples
indices = np.arange(examples.shape[0])
np.random.shuffle(indices)

train_idx = int(indices.shape[0]*0.6)
valid_idx = int(indices.shape[0]*0.8)

print("Train index: ", train_idx)
print("Valid index: ", valid_idx)

train_indices = indices[:train_idx]
valid_indices = indices[train_idx:valid_idx]
test_indices = indices[valid_idx:]

examples_train = examples[train_indices]
labels_train = labels[train_indices]
examples_valid = examples[valid_indices]
labels_valid = labels[valid_indices]
examples_test = examples[test_indices]
labels_test = labels[test_indices]

#save preprocessed data
with open(os.path.join(save_folder, "examples_train.npy"), 'wb') as f:
    np.save(f, examples_train)
with open(os.path.join(save_folder, "examples_valid.npy"), 'wb') as f:
    np.save(f, examples_valid)
with open(os.path.join(save_folder, "examples_test.npy"), 'wb') as f:
    np.save(f, examples_test)
with open(os.path.join(save_folder, "labels_train.npy"), 'wb') as f:
    np.save(f, labels_train)
with open(os.path.join(save_folder, "labels_valid.npy"),'wb') as f:
    np.save(f, labels_valid)
with open(os.path.join(save_folder, "labels_test.npy"),'wb') as f:
    np.save(f, labels_test)

Total Examples:  1286
Train index:  771
Valid index:  1028


In [17]:
save_folder

'data/gordon/preprocessed/orien/'

In [20]:
examples_train.min()

0.0

## Test of DataLoader

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from data_utils import *

In [14]:
train_loader, valid_loader, test_loader = load_gordon(suffix="orien")