In [4]:
import torch
from torch import nn

import pandas as pd
import numpy  as np
import os
import glob

In [5]:
_data_file = os.path.join("..", "data", "jikken1", "raw", "*.csv")
data_files = sorted(glob.glob(_data_file))

data_files

['../data/jikken1/raw/Conv-sensorA-1.csv',
 '../data/jikken1/raw/Conv-sensorB-1.csv',
 '../data/jikken1/raw/Conv-sensorC-1.csv',
 '../data/jikken1/raw/Conv-sensorD-1.csv',
 '../data/jikken1/raw/Conv-sensorE-1.csv']

In [6]:
data_list = []

for data_file in data_files:
    identifier = os.path.basename(data_file)[-7]
    
    df = pd.read_csv(data_file, encoding="shift-jis")
    
    df_nona = df.dropna(subset="label")
    print("***df_nona first", df_nona.index[0])
    
    df_filter_start_na = df.iloc[df_nona.index[0]:df_nona.index[-1], :]
    
    df_filter_start_na = df_filter_start_na.reset_index(drop=True)
    
    label = df_filter_start_na.loc[:, "label"]
    df_filter_start_na = df_filter_start_na.drop(["ts", "label"], axis=1)
    
    df_filter_start_na = df_filter_start_na.rename(lambda x: x + f"_{identifier}", axis=1)
    
    print("len(df_filter_start_na)", len(df_filter_start_na))
  
    data_list.append(df_filter_start_na)

data_df = pd.concat(data_list, axis=1)
data_df["label"] = label

data_df

***df_nona first 25422
len(df_filter_start_na) 90432
***df_nona first 24679
len(df_filter_start_na) 90432
***df_nona first 24396
len(df_filter_start_na) 90432
***df_nona first 24161
len(df_filter_start_na) 90432
***df_nona first 23883
len(df_filter_start_na) 90432


Unnamed: 0,ax_A,ay_A,az_A,gx_A,gy_A,gz_A,ax_B,ay_B,az_B,gx_B,...,gx_D,gy_D,gz_D,ax_E,ay_E,az_E,gx_E,gy_E,gz_E,label
0,-7935,1954,3465,-2788,323,-3586,-11585,-2828,1490,-6382,...,-4613,10980,-530,-8155,1505,3740,120,-1570,3440,階段降り
1,-7847,1998,3445,-2855,158,-3629,-11443,-2667,1373,-7248,...,-5308,11516,-280,-7842,1520,3598,241,-1710,3196,階段降り
2,-7647,2051,3387,-3008,-48,-3592,-11199,-2462,1299,-8083,...,-5363,11230,-129,-7432,1407,3462,223,-1685,2915,階段降り
3,-7495,2100,3309,-3124,-310,-3531,-10965,-2296,1021,-8907,...,-6266,10797,194,-7208,1354,3354,34,-1649,2635,階段降り
4,-7315,1954,3245,-3191,-548,-3421,-10652,-2233,782,-9650,...,-7906,11059,322,-7017,1158,3261,-264,-1673,2366,階段降り
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90427,-6387,6016,4700,199,-353,-147,-5169,-6183,5640,-16,...,69,-92,-189,-8550,1476,4765,205,76,-66,座っている
90428,-6358,6031,4749,278,-487,-196,-5145,-6197,5645,20,...,44,-99,-183,-8536,1476,4800,217,45,-108,座っている
90429,-6280,5997,4847,351,-554,-202,-5174,-6188,5577,130,...,-4,-129,-183,-8497,1529,4721,205,45,-120,座っている
90430,-6265,6011,4837,400,-652,-214,-5193,-6144,5543,160,...,-29,-123,-219,-8511,1515,4707,205,15,-169,座っている


## Segmentation

In [7]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):

    
    def __init__(self, data_df, Window_size = 200, over_lap = 0.5, margin = 300):
        
        self.data_df = data_df
        self.Window_size = Window_size
        self.over_lap = over_lap
        self.margin = margin
        
        self.labels = list(self.data_df["label"].value_counts().keys())
        
        # Each data will be at size column(6 x 5 = 30) x Window_size
        # achieved by `data_df.loc[index: index + self.Window_size, :]` for index in data_df_index_list.
        # Data label will be `data_df.loc[index + self.Window_size, "label"]` for index in data_df_index_list.
        self.data_df_index_list = []
        
        overlap_data = Window_size * over_lap

        index = 0
        loop = 0

        while index < data_df.shape[0]:
            if pd.isna(data_df['label'][index]):
                index = index + 1
                continue

            count = 0
            while count < margin:
                count, index = count + 1, index + 1


            if index  >= data_df.shape[0]: break
            current_label = data_df.loc[index, 'label']

            while not pd.isna(data_df['label'][index + Window_size]) and data_df['label'][index + Window_size] == current_label:
                signal_data = data_df.iloc[index: index + Window_size, :]
                
                self.data_df_index_list.append(index)
                index = index + Window_size

                ## TODO: signal_data

                index = index - int(overlap_data)
                if index + Window_size >= data_df.shape[0]: break
        
    def __len__(self):
        return len(self.data_df_index_list)
    
    def __getitem__(self, idx):
        index = self.data_df_index_list[idx]
        x = data_df.iloc[index: index + self.Window_size, :].drop(["label"], axis=1).values
        label = self.labels.index(data_df.iloc[self.Window_size, :]["label"])
        
        return x, label

In [8]:
from torch.utils.data import DataLoader
dataset = CustomDataset(data_df)
loader = DataLoader(dataset)

In [9]:
next(iter(loader))

[tensor([[[-14698,   5987,    872,  ...,   2180,  -4161,  -2169],
          [-14551,   6133,    921,  ...,   2241,  -3874,  -2797],
          [-14263,   5709,    735,  ...,   2387,  -3362,  -3303],
          ...,
          [ -7188,   2042,   1180,  ...,  -3149,  -2454,   1690],
          [ -7251,   2266,   1165,  ...,  -3679,  -2271,   1641],
          [ -7261,   2437,   1302,  ...,  -4149,  -1978,   1531]]]),
 tensor([4])]

In [11]:
len(dataset)

687

## Model definition

In [10]:
class LSTMModel(nn.Module):
    def __init__(self, hidden_size=128, input_size=9, output_size=6):
        super().__init__()
        self.rnn = nn.LSTM(input_size=input_size, 
                          hidden_size=hidden_size,
                          num_layers=2,
                          batch_first=True)
        
        self.seq_1 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
        )
        
        self.seq_2 = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Dropout1d(p=0.2),
            nn.ReLU(),
        )
        
        self.classifier = nn.Linear(in_features=3 * hidden_size, out_features=output_size)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        activation, _ = self.rnn(x)
        
        lstm_output = activation[-1]
        seq_1_output = self.seq_1(lstm_output)
        seq_2_output = self.seq_2(lstm_output)
        
        output = torch.concat([lstm_output, seq_1_output, seq_2_output], dim=1)
        output = self.classifier(output)
        output = self.softmax(output)
        
        return output
