In [1]:
import os

import numpy as np
import pandas as pd

import torch
import torchvision

import matplotlib.pyplot as plt
import lightning as L

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from data_module.data_module import DefaultDataModule
from model.classifier_model import Classifier1DRaw



In [5]:
TEST = False

random_seed = 42
raw_lin_gyr_dataset_path = "dataset/raw_data_lin_gyr"

In [8]:
flag = 0

if not os.path.isdir(raw_lin_gyr_dataset_path):
    os.makedirs(raw_lin_gyr_dataset_path)
    data_key = ["LAcc_x", "LAcc_y", "LAcc_z", "Gyr_x", "Gyr_y", "Gyr_z"]

    data_path_dict = dict(zip(data_key, 
        [os.path.join("dataset", "Torso", "LAcc_x.txt"),
        os.path.join("dataset", "Torso", "LAcc_y.txt"),
        os.path.join("dataset", "Torso", "LAcc_z.txt"),
        os.path.join("dataset", "Torso", "Gyr_x.txt"),
        os.path.join("dataset", "Torso", "Gyr_y.txt"),
        os.path.join("dataset", "Torso", "Gyr_z.txt"),]))
    
    data_stacked_list = []

    for data_name in data_key:
        one_data = np.loadtxt(data_path_dict[data_name])
        data_stacked_list.append(one_data)

    data_stacked = np.stack(data_stacked_list)
    print("data_stacked.shape", data_stacked.shape)

    data_stacked = data_stacked.transpose(1, 0, 2)
    print("data_stacked.shape", data_stacked.shape)
    
    np.save(os.path.join(raw_lin_gyr_dataset_path, "torso_lin_gyr.npy") ,data_stacked)

    label_path = os.path.join("dataset", "Torso", "Label.txt")
    label = np.loadtxt(label_path)
    np.save(os.path.join(raw_lin_gyr_dataset_path, "torso_label.npy"), label)
    flag = 1


data_stacked.shape (6, 196072, 500)
data_stacked.shape (196072, 6, 500)


In [10]:
if flag == 1:
    
    from sklearn.model_selection import train_test_split
    TRAIN_SIZE, VAL_SIZE, TEST_SIZE = 0.8, 0.1, 0.1
    
    train_file = os.path.join(raw_lin_gyr_dataset_path, "torso_lin_gyr.npy")
    label_file = os.path.join(raw_lin_gyr_dataset_path, "torso_label.npy")

    train_data = np.load(train_file)
    label_data = np.load(label_file)

    train_file = os.path.join(raw_lin_gyr_dataset_path, "torso_train.npy")
    val_file = os.path.join(raw_lin_gyr_dataset_path, "torso_val.npy")
    test_file = os.path.join(raw_lin_gyr_dataset_path, "torso_test.npy")

    label_train_file = os.path.join(raw_lin_gyr_dataset_path, "torso_train_label.npy")
    label_val_file = os.path.join(raw_lin_gyr_dataset_path, "torso_val_label.npy")
    label_test_file = os.path.join(raw_lin_gyr_dataset_path, "torso_test_label.npy")

    activity_range = list(range(1, 8+1))

    label_idx = np.array([not any(x - x[0]) for x in label_data])
    print("label_idx", label_idx)

    data_filtered = train_data[label_idx]
    label_filtered = label_data[label_idx]

    label = label_filtered[:, 0]

    train_val_data, test_data, train_val_label, test_label = \
        train_test_split(data_filtered, label, test_size=TEST_SIZE, stratify=label, shuffle=True)

    train_data, val_data, train_label, val_label = \
        train_test_split(train_val_data, train_val_label, test_size=VAL_SIZE / (TRAIN_SIZE + VAL_SIZE), stratify=train_val_label, shuffle=True)

    print("train_data.shape, train_label.shape", train_data.shape, train_label.shape)
    print("val_data.shape, val_label.shape", val_data.shape, val_label.shape)
    print("test_data.shape, test_label.shape", test_data.shape, test_label.shape)

    np.save(train_file, train_data)
    np.save(label_train_file, train_label)
    np.save(val_file, val_data)
    np.save(label_val_file, val_label)
    np.save(test_file, test_data)
    np.save(label_test_file, test_label)

label_idx [ True  True  True ...  True  True  True]
train_data.shape, train_label.shape (156392, 6, 500) (156392,)
val_data.shape, val_label.shape (19549, 6, 500) (19549,)
test_data.shape, test_label.shape (19550, 6, 500) (19550,)


In [3]:
from lightning.pytorch.utilities.model_summary import ModelSummary

net = Classifier1DRaw(
    optimizer=optim.SGD,
    optimizer_param={
        "learning_rate": 0.01,
        "momentum": 0.5,
    }, 
    cnn_channel_param = [
        (6, 32, 8, 0, 3),
        (32, 64, 8, 0, 3)
    ],
    linear_channel_param = [
        256, 128
    ]).to("cpu")

model_summary = ModelSummary(net, max_depth=6)
print(model_summary)

   | Name     | Type        | Params | In sizes      | Out sizes    
--------------------------------------------------------------------------
0  | cnn      | Sequential  | 18.0 K | [10, 6, 500]  | [10, 64, 53] 
1  | cnn.0    | Conv1d      | 1.6 K  | [10, 6, 500]  | [10, 32, 165]
2  | cnn.1    | ReLU        | 0      | [10, 32, 165] | [10, 32, 165]
3  | cnn.2    | Dropout     | 0      | [10, 32, 165] | [10, 32, 165]
4  | cnn.3    | Conv1d      | 16.4 K | [10, 32, 165] | [10, 64, 53] 
5  | cnn.4    | ReLU        | 0      | [10, 64, 53]  | [10, 64, 53] 
6  | cnn.5    | Dropout     | 0      | [10, 64, 53]  | [10, 64, 53] 
7  | linear   | Sequential  | 903 K  | [10, 3392]    | [10, 8]      
8  | linear.0 | Linear      | 868 K  | [10, 3392]    | [10, 256]    
9  | linear.1 | BatchNorm1d | 512    | [10, 256]     | [10, 256]    
10 | linear.2 | ReLU        | 0      | [10, 256]     | [10, 256]    
11 | linear.3 | Linear      | 32.9 K | [10, 256]     | [10, 128]    
12 | linear.4 | BatchNorm1d 

In [None]:
n_epochs = 20000
patience = n_epochs//100

optimizer_param_dict = {
    "Adam": (optim.Adam, {
        "lr": 0.001,
    }),
    "SGD": (optim.SGD, {
        "lr": 0.001,
        "momentum": 0.5,
    }),
}
optimizer, optimizer_param = optimizer_param_dict["Adam"]
dataset_path = raw_lin_gyr_dataset_path

log_save_dir = "lightning_logs"
log_save_name = "09_raw_lin_gyr"

In [None]:
print(" ----------------------start training---------------------------")
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger, CSVLogger

tensorboard_logger = TensorBoardLogger(save_dir=log_save_dir, name=log_save_name,)
csv_logger = CSVLogger(save_dir=log_save_dir, name=log_save_name,)
checkpoint_callback = ModelCheckpoint(
    dirpath=None,
    save_top_k=1,
    monitor="val_loss",
    mode="min",
    filename="sample_{epoch:02d}-{step:02d}-{val_loss:02f}"
)

trainer = L.Trainer(
    logger=[tensorboard_logger, csv_logger],
    callbacks=[EarlyStopping(monitor="val_loss", patience=patience), checkpoint_callback],
    max_epochs=n_epochs,
    check_val_every_n_epoch=10,
    accelerator="gpu", 
    devices=4, 
    strategy="ddp"
    )

net = Classifier1DRaw(
    optimizer = optimizer,
    optimizer_param = optimizer_param, 
    cnn_channel_param = [
        (6, 32, 8, 0, 3),
        (32, 64, 8, 0, 3)
    ],
    linear_channel_param = [
        256, 128
    ],
)

data_module = DefaultDataModule(dataset_path=raw_lin_gyr_dataset_path, batch_size=8192, prefix="torso_")

trainer.fit(model=net, datamodule=data_module)
trainer.test(model=net, datamodule=data_module)