## 0. 학습 세팅

### 1) 메모리 정리

In [45]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

### 2) 수정된 코드 자동 업데이트

In [46]:
%load_ext autoreload
%autoreload 2
import foolbox as fb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Load library

In [47]:
# 라이브러리 호출
import os
import time
from collections import Counter, defaultdict

import argparse
import easydict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

from tqdm import tqdm_notebook

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

## 2. Variable Declaration

In [48]:
# # Jupyter 외 환경
# parser = argparse.ArgumentParser()
# parser.add_argument("--title", type=str, default="baseline")
# parser.add_argument("--device", type=str, default="cuda")
# parser.add_argument("--root", type=str, default="data")
# parser.add_argument("--batch_size", type=int, default=64)
# parser.add_argument("--num_workers", type=int, default=2)
# parser.add_argument("--epochs", type=int, default=100)
# parser.add_argument("--lr", type=float, default=0.001)
# parser.add_argument("--logs", type=str, default='logs')
# parser.add_argument("--checkpoints", type=str, default='checkpoints')
# parser.add_argument("--resume", type=bool, default=False)
# args = parser.parse_args()

# Jupyter 환경
args = easydict.EasyDict({
        "title" : "VEEM_Test",
        "device" : "cuda",
        "batch_size" : 32, # !!!
        "num_workers" : 2,
        "epochs" : 62, # !!!### 2) 모델 + 옵티마이저 + 손실함수 + 스케쥴러 + 메트릭 함수 정의
        "lr" : 0.01, # !!!
        "logs" : "logs",
        "checkpoints" : "checkpoints",
        "resume" : False,
        "test_ratio" : 0.1,
        "input_size" : 11,
        "hidden_size" : 2,
        "num_layers" : 1,
        "output_size" : 5
    })

## 3. Model Define

### 1) 모델 정의

In [49]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNN, self).__init__() # 상속한 nn.Module에서 RNN에 해당하는 init 실행
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # input x : (BATCH, LENGTH, INPUT_SIZE) 입니다 (다양한 length를 다룰 수 있습니다.).
        # 최초의 hidden state와 cell state를 초기화시켜주어야 합니다.
        # 배치 사이즈는 가변적이므로 클래스 내에선 표현하지 않습니다.
        # 만약 Bi-directional LSTM이라면 아래의 hidden and cell states의 첫번째 차원은 2*self.num_layers 입니다. 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(args.device) # (BATCH SIZE, SEQ_LENGTH, HIDDEN_SIZE)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(args.device) # (BATCH SIZE, SEQ_LENGTH, HIDDEN_SIZE)

        # LSTM 순전파
        out, _ = self.lstm(x, (h0, c0)) # output : (BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE) tensors. (hn, cn)은 필요 없으므로 받지 않고 _로 처리합니다. 

        # 마지막 time step(sequence length)의 hidden state를 사용해 Class들의 logit을 반환합니다(hidden_size -> output_size). 
        out = self.fc(out[:, -1, :])
        return out

### 2) 모델 + 옵티마이저 + 손실함수 + 스케쥴러 + 메트릭 함수 정의

In [50]:
# Build model
model = RNN(args.input_size, args.hidden_size, args.num_layers, args.output_size).to(args.device)

# Build loss function
criterion = nn.MSELoss()

# Build optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

## 4. Data Preprocessing

### 2) 데이터셋 불러오기

In [51]:
# 데이터셋 불러오기 + 출력
Person_path = f'C:\\Users\\Bang\\JupyterProjects\\VEEM_Project\\data\\Person_SNSB\\Person.csv'
SNSB_path = f'C:\\Users\\Bang\\JupyterProjects\\VEEM_Project\\data\\Person_SNSB\\SNSB.csv'

Person_dataset=pd.read_csv(Person_path)
SNSB_dataset=pd.read_csv(SNSB_path)

print("ㅡㅡㅡㅡㅡ[Person_dataset.dtypes]ㅡㅡㅡㅡㅡ")
print(Person_dataset.dtypes)
print("ㅡㅡㅡㅡㅡ[SNSB_dataset.dtypes]ㅡㅡㅡㅡㅡ")
print(SNSB_dataset.dtypes)

ㅡㅡㅡㅡㅡ[Person_dataset.dtypes]ㅡㅡㅡㅡㅡ
번호        int64
성명 코드    object
집단       object
성별       object
나이        int64
교육연한      int64
dtype: object
ㅡㅡㅡㅡㅡ[SNSB_dataset.dtypes]ㅡㅡㅡㅡㅡ
번호                      int64
성명 코드                  object
집단                     object
DST_F+B                 int64
S-K-BNT                 int64
RCFT_copyscore        float64
SVLT_delayedrecall      int64
K-TMT-E_B               int64
dtype: object


### 3) 데이터 자료형 변환

In [52]:
Person_dataset['나이'] = Person_dataset['나이'].astype(float)
Person_dataset['교육연한'] = Person_dataset['교육연한'].astype(float)

SNSB_dataset['DST_F+B'] = SNSB_dataset['DST_F+B'].astype(float)
SNSB_dataset['S-K-BNT'] = SNSB_dataset['S-K-BNT'].astype(float)
SNSB_dataset['SVLT_delayedrecall'] = SNSB_dataset['SVLT_delayedrecall'].astype(float)
SNSB_dataset['K-TMT-E_B'] = SNSB_dataset['K-TMT-E_B'].astype(float)

print("ㅡㅡㅡㅡㅡ[Person_dataset.dtypes]ㅡㅡㅡㅡㅡ")
print(Person_dataset.dtypes)
print("ㅡㅡㅡㅡㅡ[SNSB_dataset.dtypes]ㅡㅡㅡㅡㅡ")
print(SNSB_dataset.dtypes)

ㅡㅡㅡㅡㅡ[Person_dataset.dtypes]ㅡㅡㅡㅡㅡ
번호         int64
성명 코드     object
집단        object
성별        object
나이       float64
교육연한     float64
dtype: object
ㅡㅡㅡㅡㅡ[SNSB_dataset.dtypes]ㅡㅡㅡㅡㅡ
번호                      int64
성명 코드                  object
집단                     object
DST_F+B               float64
S-K-BNT               float64
RCFT_copyscore        float64
SVLT_delayedrecall    float64
K-TMT-E_B             float64
dtype: object


### 4) 인덱스 지정

In [53]:
Person_dataset.set_index('번호', inplace=True)
SNSB_dataset.set_index('번호', inplace=True)

In [54]:
print("ㅡㅡㅡㅡㅡ[Person_dataset]ㅡㅡㅡㅡㅡ")
Person_dataset

ㅡㅡㅡㅡㅡ[Person_dataset]ㅡㅡㅡㅡㅡ


Unnamed: 0_level_0,성명 코드,집단,성별,나이,교육연한
번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,NSU1,HC,여성,71.0,16.0
3,CSR1,HC,남성,73.0,16.0
4,KMO1,MCI,여성,59.0,12.0
6,LCH1,MCI,남성,83.0,14.0
7,LYH1,MCI,여성,67.0,15.0
8,MYG1,HC,남성,60.0,16.0
9,YHR1,HC,남성,79.0,16.0
10,LJG1,MCI,남성,69.0,12.0
11,PHG1,MCI,남성,86.0,12.0
12,SGS1,MCI,남성,57.0,9.0


In [55]:
print("ㅡㅡㅡㅡㅡ[SNSB_dataset]ㅡㅡㅡㅡㅡ")
SNSB_dataset

ㅡㅡㅡㅡㅡ[SNSB_dataset]ㅡㅡㅡㅡㅡ


Unnamed: 0_level_0,성명 코드,집단,DST_F+B,S-K-BNT,RCFT_copyscore,SVLT_delayedrecall,K-TMT-E_B
번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,NSU1,HC,13.0,11.0,35.0,11.0,16.0
3,CSR1,HC,10.0,15.0,30.0,5.0,26.0
4,KMO1,MCI,4.0,11.0,35.0,1.0,50.0
6,LCH1,MCI,5.0,8.0,18.5,0.0,300.0
7,LYH1,MCI,12.0,10.0,36.0,7.0,36.0
8,MYG1,HC,11.0,13.0,36.0,6.0,20.0
9,YHR1,HC,11.0,13.0,34.0,7.0,32.0
10,LJG1,MCI,9.0,12.0,27.0,0.0,43.0
11,PHG1,MCI,8.0,9.0,28.0,0.0,90.0
12,SGS1,MCI,7.0,11.0,26.0,0.0,121.0


### 5) 필요한 피쳐 추출

In [56]:
# 데이터셋 나누기
Person_all_dataset=Person_dataset.iloc[:, 2:]
SNSB_all_dataset=SNSB_dataset.iloc[:, 2:]

In [57]:
print("ㅡㅡㅡㅡㅡ[Person_all_dataset]ㅡㅡㅡㅡㅡ")
Person_all_dataset

ㅡㅡㅡㅡㅡ[Person_all_dataset]ㅡㅡㅡㅡㅡ


Unnamed: 0_level_0,성별,나이,교육연한
번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,여성,71.0,16.0
3,남성,73.0,16.0
4,여성,59.0,12.0
6,남성,83.0,14.0
7,여성,67.0,15.0
8,남성,60.0,16.0
9,남성,79.0,16.0
10,남성,69.0,12.0
11,남성,86.0,12.0
12,남성,57.0,9.0


In [58]:
print("ㅡㅡㅡㅡㅡ[SNSB_all_dataset]ㅡㅡㅡㅡㅡ")
SNSB_all_dataset

ㅡㅡㅡㅡㅡ[SNSB_all_dataset]ㅡㅡㅡㅡㅡ


Unnamed: 0_level_0,DST_F+B,S-K-BNT,RCFT_copyscore,SVLT_delayedrecall,K-TMT-E_B
번호,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,13.0,11.0,35.0,11.0,16.0
3,10.0,15.0,30.0,5.0,26.0
4,4.0,11.0,35.0,1.0,50.0
6,5.0,8.0,18.5,0.0,300.0
7,12.0,10.0,36.0,7.0,36.0
8,11.0,13.0,36.0,6.0,20.0
9,11.0,13.0,34.0,7.0,32.0
10,9.0,12.0,27.0,0.0,43.0
11,8.0,9.0,28.0,0.0,90.0
12,7.0,11.0,26.0,0.0,121.0


### 6) pandas -> numpy -> torch 변환

In [59]:
# Person_all_dataset_np = Person_all_dataset.to_numpy()
# Person_all_dataset_torch = torch.from_numpy(Person_all_dataset_np).float()

# print("ㅡㅡㅡㅡㅡ[Person_all_dataset_torch.shape]ㅡㅡㅡㅡㅡ")
# print(Person_all_dataset_torch.shape)

# print("ㅡㅡㅡㅡㅡ[Person_all_dataset_torch]ㅡㅡㅡㅡㅡ")
# print(Person_all_dataset_torch)

In [60]:
SNSB_all_dataset_np = SNSB_all_dataset.to_numpy()
SNSB_all_dataset_torch = torch.from_numpy(SNSB_all_dataset_np).float()
"""
print("ㅡㅡㅡㅡㅡ[SNSB_all_dataset_torch.shape]ㅡㅡㅡㅡㅡ")
print(SNSB_all_dataset_torch.shape)

print("ㅡㅡㅡㅡㅡ[SNSB_all_dataset_torch]ㅡㅡㅡㅡㅡ")
print(SNSB_all_dataset_torch)
"""

'\nprint("ㅡㅡㅡㅡㅡ[SNSB_all_dataset_torch.shape]ㅡㅡㅡㅡㅡ")\nprint(SNSB_all_dataset_torch.shape)\n\nprint("ㅡㅡㅡㅡㅡ[SNSB_all_dataset_torch]ㅡㅡㅡㅡㅡ")\nprint(SNSB_all_dataset_torch)\n'

### 2) 데이터셋 불러오기

In [63]:
# 전체 데이터셋 정보
eyerpt_all_dataset = defaultdict(list)
rpt_all_dataset = defaultdict(list)

# 전체 경로
eyerpt_rpt_path = f'C:\\Users\\Bang\\JupyterProjects\\VEEM_Project\\data\\rpt\\'

# 전체 폴더 내 파일 리스트 추출
eyerpt_rpt_files_name = os.listdir(eyerpt_rpt_path)
eyerpt_rpt_files_name = sorted(eyerpt_rpt_files_name)

print("eyerpt_rpt_files_name : ", eyerpt_rpt_files_name)

# eyerpt, rpt 파일 리스트 추출
eyerpt_files_name = [eyerpt_rpt_file_name for eyerpt_rpt_file_name in eyerpt_rpt_files_name if "eye" in eyerpt_rpt_file_name]
rpt_files_name = [eyerpt_rpt_file_name for eyerpt_rpt_file_name in eyerpt_rpt_files_name if not "eye" in eyerpt_rpt_file_name]

print("eyerpt_files_name : ", eyerpt_files_name)
print("rpt_files_name : ", rpt_files_name)

### 3) train test split

In [None]:
# 전체 데이터 개수 -> 비율 기반 split
all_eyerpt_count = len(eyerpt_files_name)
test_eyerpt_count = int(all_eyerpt_count * args.test_ratio)
train_eyerpt_count = all_eyerpt_count - test_eyerpt_count

train_eyerpt_files_name = eyerpt_files_name[:train_eyerpt_count]
test_eyerpt_files_name = eyerpt_files_name[train_eyerpt_count:]

print("train_eyerpt_files_name : ", train_eyerpt_files_name)
print("len(train_eyerpt_files_name) : ", len(train_eyerpt_files_name))
print("test_eyerpt_files_name : ", test_eyerpt_files_name)
print("len(test_eyerpt_files_name) : ", len(test_eyerpt_files_name))

all_rpt_count = len(rpt_files_name)
test_rpt_count = int(all_rpt_count * args.test_ratio)
train_rpt_count = all_rpt_count - test_rpt_count

train_rpt_files_name = rpt_files_name[:train_rpt_count]
test_rpt_files_name = rpt_files_name[train_rpt_count:]

print("train_rpt_files_name : ", train_rpt_files_name)
print("len(train_rpt_files_name) : ", len(train_rpt_files_name))
print("test_rpt_files_name : ", test_rpt_files_name)
print("len(test_rpt_files_name) : ", len(test_rpt_files_name))

### 4) 전처리

In [None]:
# eyerpt 파일 전처리
def eyerpt_rpt_preprocessing(files_name):
    for i, file_name in enumerate(files_name):
        # =====================================================
        # 2) 데이터 불러오기
        # =====================================================
        eyerpt_path = eyerpt_rpt_path + eyerpt_file_name
        """
        print('eyerpt_path : ', eyerpt_path) # 확인용 코드
        """

        eyerpt_dataset=pd.read_csv(eyerpt_path)
        """
        print("ㅡㅡㅡㅡㅡ[eyerpt_dataset.dtypes]ㅡㅡㅡㅡㅡ")
        print(eyerpt_dataset.dtypes)
        """

        # =====================================================
        # 3) 데이터 자료형 변환
        # =====================================================

        # =====================================================
        # 4) 인덱스 지정
        # =====================================================
        # eyerpt_dataset['time stamp'] = pd.to_datetime(eyerpt_dataset['time stamp'])
        eyerpt_dataset.set_index('time stamp', inplace=True)
        """
        print("ㅡㅡㅡㅡㅡ[eyerpt_dataset]ㅡㅡㅡㅡㅡ")
        print(eyerpt_dataset)
        """

        # =====================================================
        # 5) 데이터 프레임 변환 + 필요한 피쳐 추출
        # =====================================================
        df_eyerpt = eyerpt_dataset.loc[:, :]
        df_eyerpt = df_eyerpt.drop(['time', 'beforeOBJ', 'presentOBJ', 'Obeject_name'], axis = 1) # 1 = columns
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt)
        """

        # =====================================================
        # 6) 결측치 행 제거
        # =====================================================
        df_eyerpt.dropna(axis=0, inplace = True)
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt)
        print(df_eyerpt.Panel_num)
        """

        # =====================================================
        # 7) 실험 종료 이후 데이터 제거
        # =====================================================
        df_eyerpt_drop8 = df_eyerpt[df_eyerpt.Panel_num < 8]
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt_drop8]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt_drop8)
        """

        # =====================================================
        # 8) 데이터 프레임 -> numpy 변환 -> torch 변환
        # =====================================================
        df_eyerpt_drop8_np = df_eyerpt_drop8
        df_eyerpt_drop8_torch = torch.from_numpy(df_eyerpt_drop8_np)
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt_drop8_torch.shape]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt_drop8_torch.shape)

        print("ㅡㅡㅡㅡㅡ[df_eyerpt_drop8_torch]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt_drop8_torch)
        """

        # =====================================================
        # 9) 데이터셋 길이 추출
        # =====================================================
        df_eyerpt_sequence_length = len(df_eyerpt_drop8_torch)
        """
        print("df_eyerpt_sequence_length : ", df_eyerpt_sequence_length)
        """

        # =====================================================
        # 10) SNSB 데이터셋 라벨 행 추출(float형 사용)
        # =====================================================
        SNSB_label = SNSB_all_dataset_torch[i,:]
        """
        print("SNSB_label : ", SNSB_label)
        """
        SNSB_label = SNSB_label.reshape(-1, len(SNSB_label))
        """
        print("SNSB_label : ", SNSB_label)
        """

        # =====================================================
        # 11) 전체 데이터셋 구성
        # =====================================================
        # 파일 정보 + 파일 sequence 길이 리스트화
        df_eyerpt_infor = [df_eyerpt_drop8_torch, df_eyerpt_sequence_length, SNSB_label]
        """
        print("df_eyerpt_infor : ", df_eyerpt_infor)
        """

        # 모든 정보 딕셔너리화
        eyerpt_all_dataset[eyerpt_file_name[0:2]].append(df_eyerpt_infor)
        # eyerpt_all_dataset = dict(zip([eyerpt_file_name[0:2]], df_eyerpt_infor))
        """
        print("eyerpt_all_dataset['0'] : ", eyerpt_all_dataset['02'])
        print("eyerpt_all_dataset : ", eyerpt_all_dataset)
        """
    
    
    
    
    
    
    
    
    
    for i, eyerpt_file_name in enumerate(eyerpt_files_name):
        # =====================================================
        # 2) 데이터 불러오기
        # =====================================================
        eyerpt_path = eyerpt_rpt_path + eyerpt_file_name
        """
        print('eyerpt_path : ', eyerpt_path) # 확인용 코드
        """

        eyerpt_dataset=pd.read_csv(eyerpt_path)
        """
        print("ㅡㅡㅡㅡㅡ[eyerpt_dataset.dtypes]ㅡㅡㅡㅡㅡ")
        print(eyerpt_dataset.dtypes)
        """

        # =====================================================
        # 3) 데이터 자료형 변환
        # =====================================================

        # =====================================================
        # 4) 인덱스 지정
        # =====================================================
        # eyerpt_dataset['time stamp'] = pd.to_datetime(eyerpt_dataset['time stamp'])
        eyerpt_dataset.set_index('time stamp', inplace=True)
        """
        print("ㅡㅡㅡㅡㅡ[eyerpt_dataset]ㅡㅡㅡㅡㅡ")
        print(eyerpt_dataset)
        """

        # =====================================================
        # 5) 데이터 프레임 변환 + 필요한 피쳐 추출
        # =====================================================
        df_eyerpt = eyerpt_dataset.loc[:, :]
        df_eyerpt = df_eyerpt.drop(['time', 'beforeOBJ', 'presentOBJ', 'Obeject_name'], axis = 1) # 1 = columns
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt)
        """

        # =====================================================
        # 6) 결측치 행 제거
        # =====================================================
        df_eyerpt.dropna(axis=0, inplace = True)
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt)
        print(df_eyerpt.Panel_num)
        """

        # =====================================================
        # 7) 실험 종료 이후 데이터 제거
        # =====================================================
        df_eyerpt_drop8 = df_eyerpt[df_eyerpt.Panel_num < 8]
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt_drop8]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt_drop8)
        """

        # =====================================================
        # 8) 데이터 프레임 -> numpy 변환 -> torch 변환
        # =====================================================
        df_eyerpt_drop8_np = df_eyerpt_drop8
        df_eyerpt_drop8_torch = torch.from_numpy(df_eyerpt_drop8_np)
        """
        print("ㅡㅡㅡㅡㅡ[df_eyerpt_drop8_torch.shape]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt_drop8_torch.shape)

        print("ㅡㅡㅡㅡㅡ[df_eyerpt_drop8_torch]ㅡㅡㅡㅡㅡ")
        print(df_eyerpt_drop8_torch)
        """

        # =====================================================
        # 9) 데이터셋 길이 추출
        # =====================================================
        df_eyerpt_sequence_length = len(df_eyerpt_drop8_torch)
        """
        print("df_eyerpt_sequence_length : ", df_eyerpt_sequence_length)
        """

        # =====================================================
        # 10) SNSB 데이터셋 라벨 행 추출(float형 사용)
        # =====================================================
        SNSB_label = SNSB_all_dataset_torch[i,:]
        """
        print("SNSB_label : ", SNSB_label)
        """
        SNSB_label = SNSB_label.reshape(-1, len(SNSB_label))
        """
        print("SNSB_label : ", SNSB_label)
        """

        # =====================================================
        # 11) 전체 데이터셋 구성
        # =====================================================
        # 파일 정보 + 파일 sequence 길이 리스트화
        df_eyerpt_infor = [df_eyerpt_drop8_torch, df_eyerpt_sequence_length, SNSB_label]
        """
        print("df_eyerpt_infor : ", df_eyerpt_infor)
        """

        # 모든 정보 딕셔너리화
        eyerpt_all_dataset[eyerpt_file_name[0:2]].append(df_eyerpt_infor)
        # eyerpt_all_dataset = dict(zip([eyerpt_file_name[0:2]], df_eyerpt_infor))
        """
        print("eyerpt_all_dataset['0'] : ", eyerpt_all_dataset['02'])
        print("eyerpt_all_dataset : ", eyerpt_all_dataset)
        """

    # rpt 파일 전처리
    for i, rpt_file_name in enumerate(rpt_files_name):
        # =====================================================
        # 2) 데이터 불러오기
        # =====================================================
        rpt_path = eyerpt_rpt_path + rpt_file_name
        """
        print('rpt_path : ', rpt_path) # 확인용 코드
        """

        rpt_dataset=pd.read_csv(rpt_path)
        """
        print("ㅡㅡㅡㅡㅡ[rpt_dataset.dtypes]ㅡㅡㅡㅡㅡ")
        print(rpt_dataset.dtypes)
        """

        # =====================================================
        # 3) 데이터 자료형 변환
        # =====================================================

        # =====================================================
        # 4) 인덱스 지정
        # =====================================================
        # rpt_dataset['time stamp'] = pd.to_datetime(rpt_dataset['time stamp'])
        rpt_dataset.set_index('time stamp', inplace=True)
        """
        print("ㅡㅡㅡㅡㅡ[rpt_dataset]ㅡㅡㅡㅡㅡ")
        print(rpt_dataset)
        """

        # =====================================================
        # 5) 데이터 프레임 변환 + 필요한 피쳐 추출
        # =====================================================
        df_rpt = rpt_dataset.loc[:, :]
        df_rpt = df_rpt.drop(['total_task_time_s', 'hand_x_rotation_deg', 'hand_y_rotation_deg', 'hand_z_rotation_deg'], axis = 1) # 1= columns
        """
        print("ㅡㅡㅡㅡㅡ[df_rpt]ㅡㅡㅡㅡㅡ")
        print(df_rpt)
        """

        # =====================================================
        # 6) 결측치 행 제거
        # =====================================================
        df_rpt.dropna(axis=0, inplace = True)
        """
        print("ㅡㅡㅡㅡㅡ[df_rpt]ㅡㅡㅡㅡㅡ")
        print(df_rpt)
        print(df_eyerpt.Panel_num)
        """

        # =====================================================
        # 7) 실험 종료 이후 데이터 제거
        # =====================================================
        df_rpt_drop8 = df_rpt[df_rpt.panel_num < 8]
        """
        print("ㅡㅡㅡㅡㅡ[df_rpt_drop8]ㅡㅡㅡㅡㅡ")
        print(df_rpt_drop8)
        """

        # =====================================================
        # 8) 데이터 프레임 -> numpy 변환 -> torch 변환(float형 사용)
        # =====================================================
        df_rpt_drop8_np = df_rpt_drop8.to_numpy()
        df_rpt_drop8_torch = torch.from_numpy(df_rpt_drop8_np).float()
        """
        print("ㅡㅡㅡㅡㅡ[df_rpt_drop8_torch.shape]ㅡㅡㅡㅡㅡ")
        print(df_rpt_drop8_torch.shape)

        print("ㅡㅡㅡㅡㅡ[df_rpt_drop8_torch]ㅡㅡㅡㅡㅡ")
        print(df_rpt_drop8_torch)
        """

        # =====================================================
        # 9) 데이터셋 길이 추출
        # =====================================================
        df_rpt_sequence_length = len(df_rpt_drop8_torch)
        """
        print("df_rpt_sequence_length : ", df_rpt_sequence_length)
        """

        # =====================================================
        # 10) SNSB 데이터셋 라벨 행 추출(float형 사용)
        # =====================================================
        SNSB_label = SNSB_all_dataset_torch[i,:]
        """
        print("SNSB_label : ", SNSB_label)
        """
        SNSB_label = SNSB_label.reshape(-1, len(SNSB_label))
        """
        print("SNSB_label : ", SNSB_label)
        """

        # =====================================================
        # 11) 전체 데이터셋 구성
        # =====================================================
        # 파일 정보 + 파일 sequence 길이 리스트화
        df_rpt_infor = [df_rpt_drop8_torch, df_rpt_sequence_length, SNSB_label]
        """
        print("df_rpt_infor : ", df_rpt_infor)
        """

        # 모든 정보 딕셔너리화
        rpt_all_dataset[rpt_file_name[0:2]].append(df_rpt_infor)
        # rpt_all_dataset = dict(zip(rpt_file_name[0:2], df_rpt_infor))
        """
        print("rpt_all_dataset : ", rpt_all_dataset)
        print("rpt_all_dataset['02'] : ", rpt_all_dataset['02'])
        """

    # 전체 데이터셋 정보 출력
    print("len(eyerpt_all_dataset) : ", len(eyerpt_all_dataset))
    print("len(rpt_all_dataset) : ", len(rpt_all_dataset))

In [64]:
make_dataset()

eyerpt_rpt_files_name :  ['02_eyerpt.csv', '02_rpt.csv', '03_eyerpt.csv', '03_rpt.csv', '04_eyerpt.csv', '04_rpt.csv', '06_eyerpt.csv', '06_rpt.csv', '07_eyerpt.csv', '07_rpt.csv', '08_eyerpt.csv', '08_rpt.csv', '09_eyerpt.csv', '09_rpt.csv', '10_eyerpt.csv', '10_rpt.csv', '11_eyerpt.csv', '11_rpt.csv', '12_eyerpt.csv', '12_rpt.csv', '13_eyerpt.csv', '13_rpt.csv', '14_eyerpt.csv', '14_rpt.csv', '15_eyerpt.csv', '15_rpt.csv', '16_eyerpt.csv', '16_rpt.csv', '17_eyerpt.csv', '17_rpt.csv', '18_eyerpt.csv', '18_rpt.csv', '19_eyerpt.csv', '19_rpt.csv', '20_eyerpt.csv', '20_rpt.csv', '21_eyerpt.csv', '21_rpt.csv', '22_eyerpt.csv', '22_rpt.csv', '23_eyerpt.csv', '23_rpt.csv', '24_eyerpt.csv', '24_rpt.csv', '25_eyerpt.csv', '25_rpt.csv', '27_eyerpt.csv', '27_rpt.csv', '28_eyerpt.csv', '28_rpt.csv', '29_eyerpt.csv', '29_rpt.csv', '30_eyerpt.csv', '30_rpt.csv', '31_eyerpt.csv', '31_rpt.csv', '32_eyerpt.csv', '32_rpt.csv', '33_eyerpt.csv', '33_rpt.csv', '34_eyerpt.csv', '34_rpt.csv', '35_eyerpt.cs

### 3) 데이터 자료형 변환

rpt_dataset['panel_num'] = rpt_dataset['panel_num'].astype(float)
rpt_dataset['error'] = rpt_dataset['error'].astype(float)

print("ㅡㅡㅡㅡㅡ[eyerpt_dataset.dtypes]ㅡㅡㅡㅡㅡ")
print(eyerpt_dataset.dtypes)
print("ㅡㅡㅡㅡㅡ[rpt_dataset.dtypes]ㅡㅡㅡㅡㅡ")
print(rpt_dataset.dtypes)

## 5. Model Train

### 1) Load model epoch

### 2) Train model

In [39]:
for epoch in range(args.epochs):
    for key, value in rpt_all_dataset.items():
        """
        print("key : ", key)
        print("value : ", value)
        print("value[0] : ", value[0])
        print("value[0][0] : ", value[0][0])
        print("value[0][1] : ", value[0][1])
        print("value[0][2] : ", value[0][2])
        """
        df_rpt_drop8_torch = value[0][0]
        df_rpt_sequence_length = value[0][1]
        SNSB_label = value[0][2]
        """
        print("SNSB_label : ", SNSB_label)
        print("SNSB_label.shape : ", SNSB_label.shape)
        """
        
        df_rpt_drop8_torch = df_rpt_drop8_torch.reshape(-1, df_rpt_sequence_length, args.input_size).to(args.device) # (BATCH(100), 1, 28, 28) -> (BATCH(100), 28, 28)
        SNSB_label = SNSB_label.to(args.device) # Size : (100)

        # 순전파
        output = model(df_rpt_drop8_torch)
        """
        print("output : ", output)
        print("output.shape : ", output.shape)
        """
        loss = criterion(output, SNSB_label)

        # 역전파 & 최적화
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, args.epochs, loss.item()))

Epoch [1/62], Loss: 412.3363
Epoch [2/62], Loss: 391.1198
Epoch [3/62], Loss: 371.1953
Epoch [4/62], Loss: 352.4972
Epoch [5/62], Loss: 334.9730
Epoch [6/62], Loss: 318.4557
Epoch [7/62], Loss: 302.9002
Epoch [8/62], Loss: 288.1965
Epoch [9/62], Loss: 274.3073
Epoch [10/62], Loss: 261.1738
Epoch [11/62], Loss: 248.7141
Epoch [12/62], Loss: 236.9281
Epoch [13/62], Loss: 225.7026
Epoch [14/62], Loss: 214.9879
Epoch [15/62], Loss: 204.8313
Epoch [16/62], Loss: 195.1491
Epoch [17/62], Loss: 185.8935
Epoch [18/62], Loss: 177.0897
Epoch [19/62], Loss: 168.6597
Epoch [20/62], Loss: 160.6149
Epoch [21/62], Loss: 86.8034
Epoch [22/62], Loss: 79.3702
Epoch [23/62], Loss: 72.4106
Epoch [24/62], Loss: 65.9212
Epoch [25/62], Loss: 59.8555
Epoch [26/62], Loss: 54.1992
Epoch [27/62], Loss: 48.9477
Epoch [28/62], Loss: 44.0676
Epoch [29/62], Loss: 39.5406
Epoch [30/62], Loss: 35.3655
Epoch [31/62], Loss: 31.5127
Epoch [32/62], Loss: 27.9390
Epoch [33/62], Loss: 24.7011
Epoch [34/62], Loss: 21.7397
Epo

In [40]:
# 모델 평가
model.eval() # Dropout, Batchnorm 등 실행 x
with torch.no_grad():
    correct = 0
    total = 0
    for key, value in rpt_all_dataset.items():
        """
        print("key : ", key)
        print("value : ", value)
        print("value[0] : ", value[0])
        print("value[0][0] : ", value[0][0])
        print("value[0][1] : ", value[0][1])
        print("value[0][2] : ", value[0][2])
        """
        df_rpt_drop8_torch = value[0][0]
        df_rpt_sequence_length = value[0][1]
        SNSB_label = value[0][2]
        """
        print("SNSB_label : ", SNSB_label)
        print("SNSB_label.shape : ", SNSB_label.shape)
        """

        df_rpt_drop8_torch = df_rpt_drop8_torch.reshape(-1, df_rpt_sequence_length, args.input_size).to(args.device) # (BATCH(100), 1, 28, 28) -> (BATCH(100), 28, 28)
        SNSB_label = SNSB_label.to(args.device) # Size : (100)

        # 순전파
        output = model(df_rpt_drop8_torch)
        """
        print("output : ", output)
        print("output.shape : ", output.shape)
        """

        print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# 모델 저장
torch.save(model.state_dict(), 'VEMM_RNN_model.ckpt')

ZeroDivisionError: division by zero