In [7]:
import csv
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
import pandas as pd
import random
import os
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import random
import math
from torch.nn.utils import weight_norm
from torch.utils.tensorboard import SummaryWriter
import os
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


import matplotlib.pyplot as plt
import shutil
from ipywidgets import interact, fixed
from ipywidgets import widgets
from ipywidgets import interactive, widgets
from IPython.display import display
from ipywidgets import interactive, widgets, HBox, VBox

from datetime import datetime
import socket


In [8]:
root_directory = '../data/Oxford Inertial Odometry Dataset'
kind = 'handheld'
floor = 'data1'
num_sequences = [7,3,5,5,4]
num_folders = 1
gt_type = 'vi'

In [9]:
WINDOW_SIZE = 120
STRIDE = 0
BATCH_SIZE = 32

In [10]:
def process_gt_file(gt_csv_path):
    path_data = {'x':[], 'y':[], "z":[]}

    with open(gt_csv_path, 'r') as file:
        reader = csv.reader(file)

        for row in reader:
            x = - float(row[2])
            y = float(row[3])
            z = float(row[4])
            path_data["x"].append(x)
            path_data["y"].append(y)
            path_data["z"].append(z)


    return path_data


In [11]:
gt_data = []
for k in range(1, num_folders+1):
  folder_path = os.path.join(root_directory, kind, f"data{k}")
  print(folder_path)

  gt_infolder = []
  for i in range(1,num_sequences[k-1]+1):
    gt_csv_path = os.path.join(folder_path, f"syn/{gt_type}{i}.csv")
    print(gt_csv_path)
    gt_infolder.append(process_gt_file(gt_csv_path))

  gt_data.append(gt_infolder)

../data/Oxford Inertial Odometry Dataset/handheld/data1
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi1.csv
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi2.csv
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi3.csv
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi4.csv
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi5.csv
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi6.csv
../data/Oxford Inertial Odometry Dataset/handheld/data1/syn/vi7.csv


In [12]:
def calMagFeature(mag, grav):
    magnitude = math.sqrt(sum(component**2 for component in grav))
    grav_norm = [component / magnitude for component in grav]
    dot_product = sum(component1 * component2 for component1, component2 in zip(mag, grav_norm))
    mag_along_grav = [component * dot_product for component in grav_norm]
    mag_orth_grav = [component1 - component2 for component1, component2 in zip(mag, mag_along_grav)]
    magnitide_along_grav = math.sqrt(sum(component**2 for component in mag_along_grav))
    if dot_product<0:
        magnitide_along_grav = -magnitide_along_grav
    magnitide_orth_grav = math.sqrt(sum(component**2 for component in mag_orth_grav))
    return [magnitide_along_grav,magnitide_orth_grav,math.sqrt(sum(component**2 for component in mag))]


In [13]:
def process_imu_file(imu_csv_path):
    path_data = {'Bv':[], 'Bh':[], "Bp":[]}

    with open(imu_csv_path, 'r') as file:
        reader = csv.reader(file)

        for row in reader:
            x = float(row[-3])
            y = float(row[-2])
            z = float(row[-1])
            PAA_mag = [x,y,z]
            PAA_grav = [float(row[-9]),float(row[-8]),float(row[-7])]
            Bv, Bh, Bp = calMagFeature(PAA_mag, PAA_grav)
            path_data["Bv"].append(Bv)
            path_data["Bh"].append(Bh)
            path_data["Bp"].append(Bp)


    return path_data


In [14]:
mag_data = []
for k in range(1, num_folders+1):
  folder_path = os.path.join(root_directory, kind, f"data{k}")
  mag_infolder = []
  for i in range(1,num_sequences[k-1]+1):
    mag_csv_path = os.path.join(folder_path, f"syn/imu{i}.csv")
    mag_infolder.append(process_imu_file(mag_csv_path))

  mag_data.append(mag_infolder)

In [15]:
print(len(gt_data) == len(mag_data))
for k in range(num_folders):

  for i in range(num_sequences[k]):
    if len(gt_data[k][i]) == len(mag_data[k][i]):
      if (len(gt_data[k][i]['x']) == len(mag_data[k][i]['Bv'])):
        continue

    print(f'Error!folder{k}, sequences{i+1}')


True


# **Split**

In [16]:
train_seq = [1,2,3,4,5]
val_seq = [6]
test_seq = [7]

In [17]:
train_folder = [1,2,3,4]
test_folder = [5]

In [18]:
def prepare_sequences(mag_data, gt_data, window_size=WINDOW_SIZE, stride=STRIDE): #mag_data, gt_data: (sequence_idx, keys, values)

  X, y = [], []
  for idx, (mag, gt) in enumerate(zip(mag_data, gt_data)):
    input = np.column_stack(((mag['Bv'], mag['Bh'], mag['Bp'])))
    output = np.column_stack((gt['x'], gt['y']))

    if input.shape[0]!= output.shape[0]:
      print(f"Error: input shape is {input.shape[0]}, output shape is {output.shape[0]}")
      return X, y

    for i in range(len(input) - window_size):
      if stride != 0 and i % stride != 0:
          continue
      else:
        X.append(input[i:i+window_size])
        y.append(output[i+window_size])
        i += stride

  return np.array(X), np.array(y)
  #X : (num_samples, window_size, 3)
  #y : (num_samples, 2)

In [19]:
mag_train_data  = []
gt_train_data   = []
mag_val_data  = []
gt_val_data   = []
mag_test_data  = []
gt_test_data   = []

In [27]:
print(len(mag_data), len(mag_data[0]) if mag_data else 0)
print(len(gt_data), len(gt_data[0]) if gt_data else 0)
print(mag_data[0])

1 7
1 7


In [38]:
k = 1
for i in train_seq:
  mag_train_data.append(mag_data[k-1][i-1])
  gt_train_data.append(gt_data[k-1][i-1])

for i in val_seq:
  mag_val_data.append(mag_data[k-1][i-1])
  gt_val_data.append(gt_data[k-1][i-1])

for i in test_seq:
  mag_test_data.append(mag_data[k-1][i-1])
  gt_test_data.append(gt_data[k-1][i-1])

In [39]:
X_train_raw, y_train_raw = prepare_sequences(mag_train_data, gt_train_data)
X_val_raw, y_val_raw = prepare_sequences(mag_val_data, gt_val_data)
X_test_raw, y_test_raw = prepare_sequences(mag_test_data, gt_test_data)

In [40]:
print("Training raw data shape:", X_train_raw.shape, y_train_raw.shape)
print("Validation raw data shape:", X_val_raw.shape, y_val_raw.shape)
print("Testing raw data shape:", X_test_raw.shape, y_test_raw.shape)

Training raw data shape: (133099, 120, 3) (133099, 2)
Validation raw data shape: (32417, 120, 3) (32417, 2)
Testing raw data shape: (13978, 120, 3) (13978, 2)


In [41]:
# Normalize input data
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train_raw.reshape(-1, X_train_raw.shape[-1])).reshape(X_train_raw.shape)
X_val_scaled = scaler_X.transform(X_val_raw.reshape(-1, X_val_raw.shape[-1])).reshape(X_val_raw.shape)
X_test_scaled = scaler_X.transform(X_test_raw.reshape(-1, X_test_raw.shape[-1])).reshape(X_test_raw.shape)

# Normalize output data
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train_raw)
y_val_scaled = scaler_y.transform(y_val_raw)
y_test_scaled = scaler_y.transform(y_test_raw)

print("Training normalized data shape:", X_train_scaled.shape, y_train_scaled.shape)
print("Validation normalized data shape:", X_val_scaled.shape, y_val_scaled.shape)
print("Testing normalized data shape:", X_test_scaled.shape, y_test_scaled.shape)

Training normalized data shape: (133099, 120, 3) (133099, 2)
Validation normalized data shape: (32417, 120, 3) (32417, 2)
Testing normalized data shape: (13978, 120, 3) (13978, 2)


In [42]:
import h5py

In [43]:
# Create a folder to store your data
folder_name = "data_hdf5"
folder_path = os.path.join(root_directory, folder_name)
print(folder_path)
if not os.path.exists(folder_path):
  os.makedirs(folder_path, exist_ok=True)
else:
  print('Folder Exists')

data/Oxford Inertial Odometry Dataset/data_hdf5


In [44]:
dataset_name = os.path.join(root_directory, folder_name, f'dataset_reframe_data1_{WINDOW_SIZE}.hdf5')

In [45]:
# Create and save the HDF5 file
if not os.path.exists(dataset_name):
  with h5py.File(dataset_name, 'w') as f:
      # Save training data
      f.create_dataset('train_data', data=X_train_scaled)
      f.create_dataset('train_labels', data=y_train_scaled)

      # Save validation data
      f.create_dataset('val_data', data=X_val_scaled)
      f.create_dataset('val_labels', data=y_val_scaled)

      # Save testing data
      f.create_dataset('test_data', data=X_test_scaled)
      f.create_dataset('test_labels', data=y_test_scaled)
else:
  print(f'Dataset {dataset_name} Exists!')

In [46]:
with h5py.File(dataset_name, 'r') as f:
    # Load training data
    train_data = torch.from_numpy(f['train_data'][:])
    train_labels = torch.from_numpy(f['train_labels'][:])

    # Load validation data
    val_data = torch.from_numpy(f['val_data'][:])
    val_labels = torch.from_numpy(f['val_labels'][:])

    # Load testing data
    test_data = torch.from_numpy(f['test_data'][:])
    test_labels = torch.from_numpy(f['test_labels'][:])

In [47]:
print("Training normalized data shape:", train_data.shape, train_labels.shape)
print("Validation normalized data shape:", val_data.shape, val_labels.shape)
print("Testing normalized data shape:", test_data.shape, test_labels.shape)

Training normalized data shape: torch.Size([133099, 120, 3]) torch.Size([133099, 2])
Validation normalized data shape: torch.Size([32417, 120, 3]) torch.Size([32417, 2])
Testing normalized data shape: torch.Size([13978, 120, 3]) torch.Size([13978, 2])
