In [1]:
import torch
if torch.cuda.is_available() == True:
    device = 'cuda:0'
    print('GPU 사용 가능')
else:
    device = 'cpu'
    print('GPU 사용 불가')

GPU 사용 가능


In [2]:
import matplotlib.pyplot as plt
# 학습 추이 기록
def init_log():
    plt.rc('font', size = 10)
    # 모든 log를 초기화
    global log_stack, iter_log, tloss_log, vloss_log, time_log
    iter_log, tloss_log, vloss_log = [], [], []
    time_log, log_stack = [], []

def record_train_log(_tloss, _time):
    # Train Log 기록용
    time_log.append(_time)
    tloss_log.append(_tloss)
    iter_log.append(epoch_cnt)

def record_valid_log(_vloss):
    # Validation Log 기록용
    vloss_log.append(_vloss)

def last(log_list):
    # 리스트 안의 마지막 숫자를 반환(print_log 함수에서 사용)
    if len(log_list) > 0:
        return log_list[len(log_list) - 1]
    else:
        return -1

from IPython.display import clear_output
def print_log():
    # 학습 추이 출력

    # 소숫점 3자리 수까지 조절
    train_loss = round(float(last(tloss_log)), 3)
    val_loss = round(float(last(vloss_log)), 3)
    time_spent = round(float(last(time_log)), 3)

    log_str = f'Epoch: {last(iter_log):3} | T_Loss {train_loss:5} | V_Loss {val_loss:5} | Time {time_spent:5}'

    log_stack.append(log_str)

    # 학습 추이 그래프 출력
    hist_fig, loss_axis = plt.subplots(figsize=(10, 3), dpi=99) # 그래프 사이즈 설정
    hist_fig.patch.set_facecolor('white') # 그래프 배경색 설정
    
    # Loss Line 구성
    loss_t_line = plt.plot(iter_log, tloss_log, label='Train Loss', color='red', marker='o')
    loss_v_line = plt.plot(iter_log, vloss_log, label='Valid Loss', color='blue', marker='s')
    loss_axis.set_xlabel('epoch')
    loss_axis.set_ylabel('loss')
    
    # 그래프 출력
    hist_lines = loss_t_line + loss_v_line # 위에서 선언한 plt정보들 통합
    loss_axis.legend(hist_lines, [l.get_label() for l in hist_lines]) # 순서대로 그려주기
    loss_axis.grid() # 격자 설정
    plt.title('Learning history until epoch {}'.format(last(iter_log)))
    plt.draw()
    
    # 텍스트 로그 출력
    clear_output(wait=True)
    plt.show()
    for idx in reversed(range(len(log_stack))): # 반대로 sort 시켜서 출력
        print(log_stack[idx])

In [3]:
import numpy as np
import random

# train dataset 집어넣기
dataset = []

def load_dataset(name):
    forehand = np.load(f'../variable_dataset/training_data/{name}/{name}_B_Forehand_sampling_train_dataset.npy')
    backhand = np.load(f'../variable_dataset/training_data/{name}/{name}_B_Backhand_sampling_train_dataset.npy')
    backslice = np.load(f'../variable_dataset/training_data/{name}/{name}_B_BackSlice_sampling_train_dataset.npy')
    forevolley = np.load(f'../variable_dataset/training_data/{name}/{name}_B_ForeVolley_sampling_train_dataset.npy')
    backvolley = np.load(f'../variable_dataset/training_data/{name}/{name}_B_BackVolley_sampling_train_dataset.npy')
    smash = np.load(f'../variable_dataset/training_data/{name}/{name}_B_Smash_sampling_train_dataset.npy')
    serve = np.load(f'../variable_dataset/training_data/{name}/{name}_B_Serve_sampling_train_dataset.npy')

    for i in forehand:
        dataset.append(i)
    for i in backhand:
        dataset.append(i)
    for i in backslice:
        dataset.append(i)
    for i in forevolley:
        dataset.append(i)
    for i in backvolley:
        dataset.append(i)
    for i in smash:
        dataset.append(i)
    for i in serve:
        dataset.append(i)

load_dataset('Federer')
load_dataset('Nadal_inv')
load_dataset('Djokovic')
load_dataset('Sinner')
load_dataset('Tsitsipas')
load_dataset('Zverev')
load_dataset('Murray')
load_dataset('Alcaraz')
load_dataset('Rune')
load_dataset('Shapovalov_inv')

random.shuffle(dataset)
print(len(dataset))

8529


In [4]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader # Pytorch에서 데이터를 불러오고, 전처리하는 클래스

class MyDataset(Dataset):
    def __init__(self, seq_data):
        self.dataset = []
        for data in seq_data:
            self.dataset.append(data)

    def __getitem__(self, index):
        data = self.dataset[index]
        return torch.Tensor(np.array(data))
        
    def __len__(self):
            return len(self.dataset)

In [5]:
split_ratio = [0.8, 0.2]
train_len = round(len(dataset) * split_ratio[0])
val_len = round(len(dataset) * split_ratio[1])
print(f'{train_len}, {val_len}')

6823, 1706


In [6]:
from torch.utils.data import random_split
train_dataset = MyDataset(dataset)
train_data, valid_data = random_split(train_dataset, [train_len, val_len])

train_loader = DataLoader(train_dataset, batch_size=16)
val_loader = DataLoader(valid_data, batch_size=16)

In [7]:
class Encoder(nn.Module):

    def __init__(self, input_size=100, hidden_size=50, num_layers=2):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,
                            dropout=0.3, bidirectional=False)

    def forward(self, x):
        outputs, (hidden, cell) = self.lstm(x)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        return (hidden, cell)
    
class Decoder(nn.Module):

    def __init__(self, input_size=100, hidden_size=50, output_size=100, num_layers=2):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,
                            dropout=0.3, bidirectional=False)

        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, hidden):
        output, (hidden, cell) = self.lstm(x, hidden)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        prediction = self.fc(output)

        return prediction, (hidden, cell)
    
## LSTM Auto Encoder
class LSTMAutoEncoder(nn.Module):

    def __init__(self,
                 input_dim: int,
                 latent_dim: int,
                 sequence_length: int=1,
                 **kwargs) -> None:
        """
        :param input_dim: 변수 Tag 갯수
        :param latent_dim: 최종 압축할 차원 크기
        :param sequence length: sequence 길이
        :param kwargs:
        """

        super(LSTMAutoEncoder, self).__init__()

        self.latent_dim = latent_dim
        self.input_dim = input_dim
        self.sequence_length = sequence_length

        if "num_layers" in kwargs:
            num_layers = kwargs.pop("num_layers")
        else:
            num_layers = 1

        self.encoder = Encoder(
            input_size=input_dim,
            hidden_size=latent_dim,
            num_layers=num_layers,
        )
        self.reconstruct_decoder = Decoder(
            input_size=input_dim,
            output_size=input_dim,
            hidden_size=latent_dim,
            num_layers=num_layers,
        )

    def forward(self, src:torch.Tensor, **kwargs):
        batch_size, sequence_length, var_length = src.size()

        ## Encoder 넣기
        encoder_hidden = self.encoder(src)
        
        inv_idx = torch.arange(sequence_length - 1, -1, -1).long()
        reconstruct_output = []
        temp_input = torch.zeros((batch_size, 1, var_length), dtype=torch.float).to(src.device)
        hidden = encoder_hidden
        for t in range(sequence_length):
            temp_input, hidden = self.reconstruct_decoder(temp_input, hidden)
            reconstruct_output.append(temp_input)
        reconstruct_output = torch.cat(reconstruct_output, dim=1)[:, inv_idx, :]
        
        return [reconstruct_output, src]

    def loss_function(self,
                      *args,
                      **kwargs) -> dict:
        recons = args[0]
        input = args[1]
        
        ## MSE loss(Mean squared Error)
        loss =F.mse_loss(recons, input)
        return loss

In [8]:
# 모델 초기화
import matplotlib.pyplot as plt
from torch.optim import Adam
def init_model():
    plt.rc('font', size = 10)
    global model, loss_fn, optim
    model = LSTMAutoEncoder(input_dim=100, latent_dim=50, sequence_length=60, num_layers=6).to(device)
    optim = Adam(model.parameters(), lr=0.0001)

# epoch count 초기화
def init_epoch():
    global epoch_cnt
    epoch_cnt = 0

In [9]:
import gc
from torch.cuda import memory_allocated, empty_cache
def clear_memory():
    if device != 'cpu':
        empty_cache()
    gc.collect()

# 학습 알고리즘
import numpy as np
from tqdm import tqdm
def epoch(data_loader, mode = 'train'):
    global epoch_cnt

    # 사용되는 변수 초기화
    iter_loss, last_grad_performed = [], False

    # 1 iteration 학습 알고리즘(for 문을 나오면 1 epoch 완료)
    for _data in tqdm(data_loader, desc='Training'):
        data = _data.to(device)

        # 1. Feed-forward
        if mode == 'train':
            model.train()
        else:
            # 학습때만 쓰이는 Dropout, Batch Mormalization을 미사용
            model.eval()
    
        result = model(data) # 1 Batch에 대한 결과가 모든 Class에 대한 확률값으로 
        
        # 2. Loss 계산
        loss = model.loss_function(*result) # GT 와 Label 비교하여 Loss 산정
        iter_loss.append(loss.item()) # 학습 추이를 위하여 Loss를 기록
    
        # 3. 역전파 학습 후 Gradient Descent
        if mode == 'train':
            optim.zero_grad() # 미분을 통해 얻은 기울기로 초기화 for 다음 epoch
            loss.backward() # 역전파 학습
            optim.step() # Gradient Descent 수행
            last_grad_performed = True # for문 나가면 epoch 카운터 += 1

    if last_grad_performed:
        epoch_cnt += 1

    clear_memory()

    # loss와 acc의 평균값 for 학습 추이 그래프, 모든 GT와 Label 값 for 컨퓨전 행렬
    return np.average(iter_loss)

def epoch_not_finished():
    # 에폭이 끝남을 알림
    return epoch_cnt < maximum_epoch

In [10]:
# Training Initialization
init_model()
init_epoch()
init_log()
maximum_epoch = 2000

In [None]:
# Training Iteration
import time
loss_result = []
while epoch_not_finished():
    start_time = time.time()
    tloss = epoch(train_loader, mode = 'train')
    end_time = time.time()
    time_taken = end_time - start_time
    record_train_log(tloss, time_taken)
    with torch.no_grad():
        vloss = epoch(val_loader, mode = 'val')
        record_valid_log(vloss)
    print_log()
    loss_result.append(f'tloss : {tloss:.5} | vloss : {vloss:.5}')
    if tloss < 0.001 and vloss < 0.001:
        torch.save(model.state_dict(), f'./model_info/anomaly_detection_model_{tloss:.5}_{vloss:.5}')
    if epoch_cnt == 1500:
        torch.save(model.state_dict(), f'./model_info/anomaly_detection_model_{tloss:.5}_{vloss:.5}_layer6_1500')

print('\n Training completed!')

Training:  10%|█████▋                                                   | 53/534 [00:03<00:31, 15.51it/s]

In [None]:
# 모델 저장하기
torch.save(model.state_dict(), f'./model_info/anomaly_detection_model_{tloss:.5}_{vloss:.5}_layer6_2000')

In [None]:
cnt = 0
with open("anomaly_detection_train_result_layer6.txt", "w") as f:
    for i in loss_result:
        cnt += 1
        f.write(str(cnt))
        f.write(" ")
        f.write(str(i))
        f.write(" \n")