In [1]:
import psycopg2

import copy
import random
import numpy as np
import pandas as pd

import joblib
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import torch
from torch import nn
from torch.utils.data import Subset, DataLoader

from Dataset.Embedding_Dataset import Embedding_Dataset
from Model.Embedding import Embedding

from Dataset.Apartment_Complex_Dataset import Apartment_Complex_Dataset
from Model.LSTM import LSTM
from Model.GRU import GRU
from Model.Transformer import Transformer

from Dataset.Dong_Dataset import Dong_Dataset
from Model.LSTM_Attention import LSTMAttention
from Model.GRU_Attention import GRUAttention
from Model.Transformer_Attention import TransformerAttention

from utils import RMSE, save_train_val_losses

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

table_1 = pd.read_csv('../데이터/Table/table_1.csv') 
table_2 = pd.read_csv('../데이터/Table/table_2.csv') 
table_3 = pd.read_csv('../데이터/Table/table_3.csv') 

In [2]:
epochs = 10000

embedding_dim = 1024

transformer_lr = 1e-4
transformer_batch = 128
transformer_window_size = 30 #

transformer_att_lr = 1e-4
transformer_att_batch = 1
transformer_att_hidden_dim = 1024
transformer_att_window_size = 30 #

In [3]:
embedding_model = torch.load('../데이터/Checkpoint/emb/embedding_lr_0.01_batch_128_epochs_9.pth', map_location=torch.device('cpu'))
dataset = Apartment_Complex_Dataset(embedding_model, table_1, table_2, table_3, embedding_dim, transformer_window_size, 'DL', DEVICE)

train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1
dataset_length = len(dataset)
train_size = int(train_ratio * dataset_length)
train_indices = range(0, train_size)
val_size = int(val_ratio * dataset_length)
val_indices = range(train_size, train_size + val_size)
test_size = int(test_ratio * dataset_length)
test_indices = range(train_size + val_size, dataset_length)

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

In [5]:
train_dataloader = DataLoader(train_dataset, batch_size=transformer_batch, shuffle=False, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=transformer_batch, shuffle=False, drop_last=True)

transformer_model = Transformer(embedding_dim, transformer_window_size, 1, 2, 2).to(DEVICE)
criterion = RMSE()
optimizer = torch.optim.Adam(transformer_model.parameters(), lr=transformer_lr)

transformer_train_losses = []
transformer_val_losses = []

max_early_stop_count = 3
early_stop_count = 0
transformer_best_val_loss = float('inf')
transformer_best_model_weights = None

for epoch in range(epochs):
    transformer_model.train()
    transformer_total_train_loss = 0
    for data in train_dataloader:
        src = data[0].to(DEVICE)
        trg = data[1].to(DEVICE)

        if (trg[0] != 0):
            src_mask = transformer_model.generate_square_subsequent_mask(src.shape[1]).to(src.device)
            output = transformer_model(src, src_mask)

            transformer_train_loss = criterion(output[0], trg)
            transformer_total_train_loss += transformer_train_loss.item()

            optimizer.zero_grad()
            transformer_train_loss.backward()
            optimizer.step()
            
    transformer_avg_train_loss = transformer_total_train_loss / len(train_dataloader)
    transformer_train_losses.append(transformer_avg_train_loss)

    transformer_model.eval()
    transformer_total_val_loss = 0
    with torch.no_grad():
        for data in val_dataloader:
            src = data[0].to(DEVICE)
            trg = data[1].to(DEVICE)

            if (trg[0] != 0):
                src_mask = transformer_model.generate_square_subsequent_mask(src.shape[1]).to(src.device)
                output = transformer_model(src, src_mask)

                transformer_val_loss = criterion(output[0], trg)
                transformer_total_val_loss += transformer_val_loss.item()

    transformer_avg_val_loss = transformer_total_val_loss / len(val_dataloader)
    transformer_val_losses.append(transformer_avg_val_loss)

    if  transformer_best_val_loss > transformer_avg_val_loss:
        transformer_best_val_loss = transformer_avg_val_loss
        transformer_best_model_weights = copy.deepcopy(transformer_model.state_dict())
        early_stop_count = 0
    else:
        early_stop_count += 1
        
    if early_stop_count >= max_early_stop_count:
        print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {transformer_avg_train_loss:.6f}, Val Loss: {transformer_avg_val_loss:.6f} \nEarly Stop Triggered!')
        transformer_model.load_state_dict(transformer_best_model_weights)
        torch.save(transformer_model, f'../데이터/Checkpoint/transformer/transformer_emb_{embedding_dim}_lr_{transformer_lr}_batch_{transformer_batch}_epochs_{epoch+1}_ws_{transformer_window_size}.pth')
        break

    print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {transformer_avg_train_loss:.6f}, Val Loss: {transformer_avg_val_loss:.6f}')



Epoch [1/10000], Train Loss: 1.673023, Val Loss: 1.986840


KeyboardInterrupt: 

In [19]:
embedding_model = torch.load('../데이터/Checkpoint/emb/embedding_lr_0.01_batch_128_epochs_9.pth', map_location=torch.device('cpu'))
dataset = Dong_Dataset(embedding_model, table_1, table_2, table_3, embedding_dim, transformer_att_window_size, 'TRAIN', DEVICE)

train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1
dataset_length = len(dataset)
train_size = int(train_ratio * dataset_length)
train_indices = range(0, train_size)
val_size = int(val_ratio * dataset_length)
val_indices = range(train_size, train_size + val_size)
test_size = int(test_ratio * dataset_length)
test_indices = range(train_size + val_size, dataset_length)

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

In [20]:
# transformer_model = torch.load('../데이터/Checkpoint/transformer_tr_0.8_lr_0.0001_wd_0_batch_128_epochs_5_ws_3.pth', map_location=torch.device('cpu'))
transformer_att_model = TransformerAttention(transformer_model, transformer_att_hidden_dim, 1, DEVICE).to(DEVICE)
criterion = RMSE()
optimizer = torch.optim.Adam(transformer_att_model.parameters(), lr=transformer_att_lr)

transformer_att_train_losses = []
transformer_att_val_losses = []

max_early_stop_count = 3
early_stop_count = 0
transformer_att_best_val_loss = float('inf')
transformer_att_best_model_weights = None

for epoch in range(epoch):
    transformer_att_model.train()
    transformer_att_total_train_loss = 0
    for data in train_dataloader:
        src = data[0][0].to(DEVICE)
        max_len = data[1][0].to(DEVICE)
        anw = data[2][0].to(DEVICE)
        trg = data[3][0].to(DEVICE)
        
        if len(anw)==0:
            continue

        for index in anw:
            output = transformer_att_model(src, index, max_len)
            
            transformer_att_train_loss = criterion(output, trg[index])
            transformer_att_total_train_loss += transformer_att_train_loss.item()
            
            optimizer.zero_grad()
            transformer_att_train_loss.backward()
            optimizer.step() 
            
    transformer_att_avg_train_loss = transformer_att_total_train_loss / len(train_dataloader)
    transformer_att_train_losses.append(transformer_att_avg_train_loss)

    transformer_att_model.eval()
    transformer_att_total_val_loss = 0
    with torch.no_grad():
        for data in val_dataloader:
            src = data[0][0].to(DEVICE)
            max_len = data[1][0].to(DEVICE)
            anw = data[2][0].to(DEVICE)
            trg = data[3][0].to(DEVICE)

            if len(anw)==0:
                continue

            for index in anw:
                output = transformer_att_model(src, index, max_len)

                transformer_att_val_loss = criterion(output, trg[index])
                transformer_att_total_val_loss += transformer_att_val_loss.item()
                
    transformer_att_avg_val_loss = transformer_att_total_val_loss / len(val_dataloader)
    transformer_att_val_losses.append(transformer_att_avg_val_loss)
            
    if  transformer_att_best_val_loss > transformer_att_avg_val_loss:
        transformer_att_best_val_loss = transformer_att_avg_val_loss
        transformer_att_best_model_weights = copy.deepcopy(transformer_att_model.state_dict())
        early_stop_count = 0
    else:
        early_stop_count += 1

    if early_stop_count >= max_early_stop_count:
        print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {transformer_att_avg_train_loss:.6f}, Val Loss: {transformer_att_avg_val_loss:.6f} \nEarly Stop Triggered!')
        transformer_att_model.load_state_dict(transformer_att_best_model_weights)
        torch.save(transformer_att_model, f'../데이터/Checkpoint/transformer_att_lr_{transformer_att_lr}_batch_{transformer_att_batch}_epochs_{epoch+1}_hdim_{transformer_att_hidden_dim}_ws_{transformer_att_window_size}.pth')
        break

    print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {transformer_att_avg_train_loss:.6f}, Val Loss: {transformer_att_avg_val_loss:.6f}')

Epoch [1/1000], Train Loss: 3.826740, Val Loss: 12.337122


In [None]:
transformer_att_lr = 1e-4
transformer_att_batch = 1
transformer_att_hidden_dim = 1024
transformer_att_window_size = 30