In [5]:
import numpy as np
import pandas as pd
from pandas.core.common import SettingWithCopyWarning
from pathlib import Path
from glob import glob
from pathlib import Path
from time import time
import warnings

from utils import *

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
set_pyplot_options(figsize=10)

np.random.seed(42)

In [6]:
LABEL = {'jog_forward': 0, 'jog_turn': 1, 'walk_forward': 2, 'walk_turn': 3, 'run_forward':4, 'stationary':5}
n_sample = 100

dir_data = Path('data/datasets_fin')


# Get all train samples, 1sec for each sample
def divide_df(df, sample_num=100, overlap=0):
    num_divs = (df.shape[0]-overlap)// (sample_num-overlap)
    df_divs = []
    for idx in range(num_divs):
        div = df[idx*(sample_num-overlap):idx*(sample_num-overlap)+sample_num]
        div.reset_index(drop=True, inplace=True)
        df_divs.append(div.copy())
    return df_divs


dfs_all = {}
for label in LABEL:
    # Read data files for each label and form 1 sec duration dataframes attached with 0.1 ms margin.
    dfs_all[label] = []
    for i, label_dir in enumerate((dir_data/label).iterdir()):
        df = pd.read_csv(label_dir)
        df_list = divide_df(df, sample_num=n_sample+10, overlap=(int(n_sample/2)+10))
        for f in df_list:
            dfs_all[label].append(f)
    
    # Remove dataframes that have discontinuities in GNSS sample points.
    # Remvoe dataframes that have inconsistent time interval (i.e. not 100ms) of GNSS samples.
    # Align time axis of GNSS points to ten'th rows
    del_list = []
    for i, df in enumerate(dfs_all[label]):
        # Time discontinuity (not 10ms)
        time_diff = df['time'].iloc[1:].to_numpy()-df['time'].iloc[:-1].to_numpy()
        a = np.where(time_diff != 1)
        if(a[0].shape[0] != 0):
            del dfs_all[label][i]
            continue
        gnss_idx = df[df['enu_x'].notna()][:].index.to_numpy()
        # GNSS point loss
        if(len(gnss_idx)<=10):
            del_list.append(i)
            continue
        a = np.where(gnss_idx%10 != 0)
        if(a[0].shape[0] != 0):
            time_diff = gnss_idx[1:]-gnss_idx[:-1]
            if(True in (time_diff != 10)): 
                del_list.append(i)
            else:
                dfs_all[label][i] = dfs_all[label][i].iloc[gnss_idx[0]:gnss_idx[0]+n_sample+1].reset_index(drop=True)
        
        else:
            dfs_all[label][i] = dfs_all[label][i].iloc[:n_sample+1].reset_index(drop=True)
    dfs_all[label] = [x for (i,x) in enumerate(dfs_all[label]) if i not in del_list]
    print("Duration of datasets(sec) ", label, len(dfs_all[label]))


# Select test samples randomly for each class
dfs_test = {}
dfs_train = {}
num_test = 100
for i, label in enumerate(LABEL):
    choice = np.random.choice(np.arange(len(dfs_all[label])), size=num_test, replace=False)
    dfs_test[label] = [x for (i, x) in enumerate(dfs_all[label]) if i in choice]
    dfs_train[label] = [x for (i, x) in enumerate(dfs_all[label]) if i not in choice]
    

Duration of datasets(sec)  jog_forward 2019
Duration of datasets(sec)  jog_turn 987
Duration of datasets(sec)  walk_forward 2977
Duration of datasets(sec)  walk_turn 1036
Duration of datasets(sec)  run_forward 1029
Duration of datasets(sec)  stationary 496


In [7]:
# Get signal images from train samples
def build_signal_image(df_org, sr_imu=100):
    n_samples = sr_imu
    cols = list(df_org.columns)
    raw_signal = df_org[:n_samples][['gyrX', 'gyrY', 'gyrZ', 'eulR', 'eulP', 'eulY']].to_numpy().copy().T
    raw_signal[[0,1,2]] *= 180/np.pi
    signal_index_sequence = [1, 2, 3, 4, 5, 6, 1, 3, 
                             5, 1, 4, 6, 2, 4, 5, 2]
    signal_image = np.array([raw_signal[idx-1] for idx in signal_index_sequence], dtype=float)
    signal_image = np.reshape(signal_image, (1, 16, sr_imu))
    
    return signal_image


train_images = []
train_labels = []
train_gnss = []
for i, label in enumerate(LABEL.keys()):
    for j, df in enumerate(dfs_train[label]):
        image = build_signal_image(df)
        train_images.append(image)
        train_labels.append(LABEL[label])
        df['label'] = np.ones(len(df))*LABEL[label]
        df = df.dropna(subset=['enu_x']).loc[:,['time','enu_x','enu_y','speed','label']]
        df.reset_index(drop=True, inplace=True)
        train_gnss.append(df.to_numpy().T)
train_images = np.stack(train_images).reshape([-1,16,100])
train_labels = np.stack(train_labels).reshape([-1,1])
train_gnss = np.array(train_gnss)

test_images = []
test_labels = []
test_gnss = []
for i, label in enumerate(LABEL.keys()):
    for j, df in enumerate(dfs_test[label]):
        image = build_signal_image(df)
        test_images.append(image)
        test_labels.append(LABEL[label])
        df['label'] = np.ones(len(df))*LABEL[label]
        df = df.dropna(subset=['enu_x']).loc[:,['time','enu_x','enu_y','speed','label']]
        df.reset_index(drop=True, inplace=True)
        test_gnss.append(df.to_numpy().T)
test_images = np.stack(test_images).reshape([-1,16,100])
test_labels = np.stack(test_labels).reshape([-1,1])
test_gnss = np.array(test_gnss)

#print("Done constructing signal images", end='')
print("TRAIN / imu_signal_img:{}, labels:{}, gnss:{}".format(train_images.shape, train_labels.shape, train_gnss.shape, end=''))
print("TEST /imu_signal_img:{}, labels:{}, gnss:{}".format(test_images.shape, test_labels.shape, test_gnss.shape))
print()


np.save(dir_data / "train_gnss.npy", train_gnss)
np.save(dir_data / 'test_gnss.npy', test_gnss)
np.save(dir_data / "train_images.npy", train_images)
np.save(dir_data / 'train_labels.npy', train_labels)
np.save(dir_data / "test_images.npy", test_images)
np.save(dir_data / 'test_labels.npy', test_labels)

TRAIN / imu_signal_img:(7944, 16, 100), labels:(7944, 1), gnss:(7944, 5, 11)
TEST /imu_signal_img:(600, 16, 100), labels:(600, 1), gnss:(600, 5, 11)



# Various IMU rate
This cell is for generating dataset including various imu sampling rates

In [8]:
def downsample_imu(images, from_f, to_f):
    interval = int(from_f/to_f)
    assert interval == float(from_f)/to_f, "The sampling rate ratio must be a integer"
    return images[:,:,::interval]


def interpolate_imu(a, from_f, to_f):
    interval = int(to_f/from_f)
    assert interval == float(to_f)/from_f, "The sampling rate ratio must be a integer"
    arr = np.zeros([a.shape[0], a.shape[1], a.shape[2]*interval], dtype=a.dtype)
    arr[:,:,::interval] = a
    inc = np.zeros(a.shape)
    inc[:,:,:-1] = (a[:,:,1:] - a[:,:,:-1])/interval
    inc[:,:,-1] = inc[:,:,-2]
    for i in range(1,interval):
        arr[:,:,i::interval] = arr[:,:,::interval] + inc*i
    
    return arr

SR_IMU = [100,50,20,10]
train_images_ds = []
test_images_ds = []
train_labels_ds = []
test_labels_ds = []
train_gnss_ds = []
test_gnss_ds = []
for sr_imu in SR_IMU:
    d = downsample_imu(train_images, n_sample, sr_imu)
    d = interpolate_imu(d, sr_imu, n_sample)
    train_images_ds.append(d)
    train_labels_ds.append(train_labels)
    train_gnss_ds.append(train_gnss)
    
    d = downsample_imu(test_images, n_sample, sr_imu)
    d = interpolate_imu(d, sr_imu, n_sample)
    test_images_ds.append(d)
    test_labels_ds.append(test_labels)
    test_gnss_ds.append(test_gnss)

train_images = np.concatenate(train_images_ds)
train_labels = np.concatenate(train_labels_ds)
test_images = np.concatenate(test_images_ds)
test_labels = np.concatenate(test_labels_ds)
train_gnss = np.concatenate(train_gnss_ds)
test_gnss = np.concatenate(test_gnss_ds)

print("TRAIN / imu_signal_img:{}, labels:{}, gnss:{}".format(train_images.shape, train_labels.shape, train_gnss.shape, end=''))
print("TEST /imu_signal_img:{}, labels:{}, gnss:{}".format(test_images.shape, test_labels.shape, test_gnss.shape))

np.save(dir_data / "train_images_imu_mix.npy", train_images)
np.save(dir_data / 'train_labels_imu_mix.npy', train_labels)
np.save(dir_data / "test_images_imu_mix.npy", test_images)
np.save(dir_data / 'test_labels_imu_mix.npy', test_labels)
np.save(dir_data / "test_gnss_imu_mix.npy", test_gnss)
np.save(dir_data / 'train_gnss_imu_mix.npy', train_gnss)
print("Done saving npy files")

TRAIN / imu_signal_img:(31776, 16, 100), labels:(31776, 1), gnss:(31776, 5, 11)
TEST /imu_signal_img:(2400, 16, 100), labels:(2400, 1), gnss:(2400, 5, 11)
Done saving npy files


# Model training

In [10]:
!python train_harcnn.py --n_epochs 200 --lr 1e-4 --n_classes 6 --n_samples 100 --output_dir "./models/" --input_dir "./data/datasets_fin"

2020-09-10 12:36:05.176464: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.2
2020-09-10 12:36:06.055205: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer.so.7
2020-09-10 12:36:06.056226: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer_plugin.so.7
==> Training..
Epoch: 1
Train Loss: 2.449 | Acc: 56.514%
Test Loss: 0.166 | Acc: 55.125%

Epoch: 2
Train Loss: 1.530 | Acc: 79.167%
Test Loss: 0.128 | Acc: 69.125%

Epoch: 3
Train Loss: 1.091 | Acc: 84.998%
Test Loss: 0.105 | Acc: 72.333%

Epoch: 4
Train Loss: 0.821 | Acc: 87.531%
Test Loss: 0.083 | Acc: 75.125%

Epoch: 5
Train Loss: 0.662 | Acc: 88.466%
Test Loss: 0.068 | Acc: 75.208%

Epoch: 6
Train Loss: 0.551 | Acc: 91.380%
Test Loss: 0.049 | Acc: 92.375%

Epoch: 7
Train Loss: 0.472 | Acc: 94.276%
Test Loss: 0.046 | Acc: 91.958%

Epoch: 8
Train Loss