In [87]:
import psycopg2

import copy
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

import joblib
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import torch
from torch import nn
from torch.utils.data import Subset, DataLoader

from Dataset.Embedding_Dataset import Embedding_Dataset
from Model.Embedding import Embedding

from Dataset.Apartment_Complex_Dataset import Apartment_Complex_Dataset
from Model.LSTM import LSTM
from Model.GRU import GRU
from Model.Transformer import Transformer

from Dataset.District_Dataset import District_Dataset
from Model.LSTM_Attention import LSTMAttention
from Model.GRU_Attention import GRUAttention
from Model.Transformer_Attention import TransformerAttention

from utils import RMSE, rmse, mse, mae, save_train_val_losses

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# connection_info = "host=localhost dbname=postgres user=postgres password=hd219833 port=5432"
# conn = psycopg2.connect(connection_info)
# table_1_query = '''
#     SELECT * FROM building
#     '''
# table_2_query = '''
#     SELECT * FROM economy
#     '''
# table_3_query = '''
#     SELECT * FROM building_price
#     '''
# table_1 = pd.read_sql(table_1_query,conn) 
# table_2 = pd.read_sql(table_2_query,conn)
# table_3 = pd.read_sql(table_3_query,conn) 

table_1 = pd.read_csv('../데이터/Table/table_1.csv') 
table_2 = pd.read_csv('../데이터/Table/table_2.csv') 
table_3 = pd.read_csv('../데이터/Table/table_3.csv') 

In [89]:
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

lr = 1e-4
batch = 64
hidden_dim = 1024
sub = True # True
embedding_dim = 512 # 1024
window_size = 12 # 12

### DL

In [90]:
embedding_model = torch.load("../데이터/Checkpoint/embedding/emb_512/embedding_lr_0.0001_batch_64_sub_True_emb_512_ws_12_epochs_27.pth", map_location=DEVICE)
dataset = District_Dataset('None', table_1, table_2, table_3, 'None', window_size, sub, DEVICE)
dataset_length = len(dataset)
train_size = int(train_ratio * dataset_length)
# train_indices = range(0, train_size)
val_size = int(val_ratio * dataset_length)
# val_indices = range(train_size, train_size + val_size)
test_size = int(test_ratio * dataset_length)
test_indices = range(train_size + val_size, dataset_length)
# train_dataset = Subset(dataset, train_indices)
# val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)
# train_dataloader = DataLoader(train_dataset, batch_size=batch, shuffle=False, drop_last=True)
# val_dataloader = DataLoader(val_dataset, batch_size=batch, shuffle=False, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch, shuffle=False, drop_last=True)

In [93]:
for data in test_dataloader:
      print(data[0])
      break

tensor([[[[-0.3762, -1.4653, -0.0327,  ...,  0.3705, -2.0001,  3.9697],
          [-0.3762, -1.4653, -0.0327,  ...,  0.3705, -2.0001,  4.0764],
          [-0.3762, -1.4653, -0.0327,  ...,  0.3705, -2.0085,  4.1932],
          ...,
          [-0.3762, -1.4653, -0.0327,  ...,  0.3705, -1.7427,  4.7958],
          [-0.3762, -1.4653, -0.0327,  ...,  0.3705, -1.5610,  4.8605],
          [-0.3762, -1.4653, -0.0327,  ...,  0.3705, -1.4129,  4.8970]],

         [[-0.4197, -1.4586, -0.6874,  ...,  0.3705, -2.0001,  3.9697],
          [-0.4197, -1.4586, -0.6874,  ...,  0.3705, -2.0001,  4.0764],
          [-0.4197, -1.4586, -0.6874,  ...,  0.3705, -2.0085,  4.1932],
          ...,
          [-0.4197, -1.4586, -0.6874,  ...,  0.3705, -1.7427,  4.7958],
          [-0.4197, -1.4586, -0.6874,  ...,  0.3705, -1.5610,  4.8605],
          [-0.4197, -1.4586, -0.6874,  ...,  0.3705, -1.4129,  4.8970]],

         [[-0.4317, -1.4287, -0.2946,  ...,  1.9567, -2.0001,  3.9697],
          [-0.4317, -1.4287, -

In [97]:
# LSTM
# model = torch.load("../데이터/Checkpoint/lstm/default/lstm_lr_0.0001_batch_64_hid_1024_sub_True_emb_1024_ws_12_epochs_10.pth", map_location=DEVICE)

# GRU
# model = torch.load("../데이터/Checkpoint/gru/default/gru_lr_0.0001_batch_64_hid_1024_sub_True_emb_1024_ws_12_epochs_9.pth", map_location=DEVICE)

# transformer
# model = torch.load("../데이터/Checkpoint/transformer/default/transformer_lr_0.0001_batch_64_sub_True_emb_1024_ws_12_epochs_15.pth", map_location=DEVICE)

# LSTM attention
# model = torch.load("../데이터/Checkpoint/lstm/attention/lstm_attention_lr_0.0001_batch_64_sub_True_emb_1024_ws_12_epochs_8.pth", map_location=DEVICE)

# GRU attention
# model = torch.load("../데이터/Checkpoint/gru/attention/gru_attention_lr_0.0001_batch_64_sub_True_emb_1024_ws_12_epochs_4.pth", map_location=DEVICE)

# transformer attention
# model = torch.load("../데이터/Checkpoint/transformer/attention/default/transformer_attention_lr_0.0001_batch_64_sub_True_emb_1024_ws_12_epochs_5.pth", map_location=DEVICE)

In [92]:
model.eval()
test_rmses = []
test_mses = []
test_maes = []

pred_data = []
trg_data = []

with torch.no_grad():
    for data in test_dataloader:
        src = data[0][0].to(DEVICE)
        max_len = data[1][0].to(DEVICE)
        try:
            anw = torch.nonzero(data[2][0]).to(DEVICE)[0]
        except:
            continue
        trg = data[3][0].to(DEVICE)

        for index in anw:
            # LSTM
            # output, _, _ = model(src)
            
            # GRU
            # output, _ = model(src)
            
            # nlinear
            # output, _ = model(src)
            
            # transformer
            # src_mask = model.generate_square_subsequent_mask(src.shape[1]).to(src.device)
            # output, _ = model(src, src_mask)
            
            # test_rmse = rmse(output[index], trg[index])
            # test_mse = mse(output[index], trg[index])
            # test_mae = mae(output[index], trg[index])
            
            # pred_data.append(output[index])
            # trg_data.append(trg[index])

            # attention
            output = model(src, index, max_len)

            pred_data.append(output)
            trg_data.append(trg[index])

# save_path = f'../데이터/Checkpoint/transformer/attention/default/transformer_attention_lr_{lr}_batch_{batch}_sub_{sub}_emb_{embedding_dim}_ws_{window_size}_epochs_{5}'
# with open(f'{save_path}_test_rmses.txt', 'w') as f:
#     for item in test_rmses:
#         f.write("%s\n" % item)
# with open(f'{save_path}_test_mses.txt', 'w') as f:
#     for item in test_mses:
#         f.write("%s\n" % item)
# with open(f'{save_path}_test_maes.txt', 'w') as f:
#     for item in test_maes:
#         f.write("%s\n" % item)

pred_data = torch.FloatTensor(pred_data)
trg_data = torch.FloatTensor(trg_data)

# pred_data = pred_data[0]
# trg_data = trg_data[0]

avg_test_rmse = rmse(pred_data, trg_data)
avg_test_mse = mse(pred_data, trg_data)
avg_test_mae = mae(pred_data, trg_data)

print(f'Test RMSE: {avg_test_rmse:.4f}')
print(f'Test MSE: {avg_test_mse:.4f}')
print(f'Test MAE: {avg_test_mae:.4f}')

RuntimeError: The size of tensor a (12) must match the size of tensor b (1024) at non-singleton dimension 2

### ML

In [6]:
embedding_model = torch.load('../데이터/Checkpoint/embedding/default/embedding_lr_0.0001_batch_64_sub_True_emb_1024_ws_12_epochs_13.pth', map_location=DEVICE)
dataset = Apartment_Complex_Dataset(embedding_model, table_1, table_2, table_3, embedding_dim, window_size, 'ML', DEVICE)
dataset_length = len(dataset)
train_size = int(train_ratio * dataset_length)
# train_indices = range(0, train_size)
val_size = int(val_ratio * dataset_length)
# val_indices = range(train_size, train_size + val_size)
test_size = int(test_ratio * dataset_length)
test_indices = range(train_size + val_size, dataset_length)
# train_dataset = Subset(dataset, train_indices)
# val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)
# train_dataloader = DataLoader(train_dataset, batch_size=batch, shuffle=False, drop_last=True)
# val_dataloader = DataLoader(val_dataset, batch_size=batch, shuffle=False, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch, shuffle=False, drop_last=True)

KeyboardInterrupt: 

In [None]:
# lightgbm
# model = joblib.load(f'../데이터/Checkpoint/lightgbm/lightgbm_batch_64_ws_12.pkl')

# catboost
model = joblib.load(f'../데이터/Checkpoint/catboost/catboost_batch_64_ws_12.pkl')

In [35]:
def rmse(y_pred, y_true):
      mse = np.mean((y_true - y_pred) ** 2)
      return np.sqrt(mse)

def mse(y_pred, y_true):
      return np.mean((y_true - y_pred) ** 2)

def nmse(y_pred, y_true):
    mse = np.mean((y_true - y_pred) ** 2)
    var = np.var(y_true)
    return mse / var


In [38]:
test_rmses = []
test_mses = []
test_maes = []

for data in test_dataloader:
    X, y = data[0].squeeze().cpu().numpy(), data[1].squeeze().cpu().numpy()
    y_pred = model.predict(X)

    test_rmse = rmse(y_pred, y)
    test_mse = mse(y_pred, y)
    test_mae = mae(y_pred, y)

    test_rmses.append(test_rmse)
    test_mses.append(test_mse)
    test_maes.append(test_mae)

save_path = f'../데이터/Checkpoint/catboost/catboost_batch_{batch}_ws_{window_size}'
with open(f'{save_path}_test_rmses.txt', 'w') as f:
    for item in test_rmses:
        f.write("%s\n" % item)
with open(f'{save_path}_test_mses.txt', 'w') as f:
    for item in test_mses:
        f.write("%s\n" % item)
with open(f'{save_path}_test_maes.txt', 'w') as f:
    for item in test_maes:
        f.write("%s\n" % item)

avg_test_rmse = sum(test_rmses) / len(test_rmses)
avg_test_mse = sum(test_mses) / len(test_mses)
avg_test_mae = sum(test_maes) / len(test_maes)

print(f'Test RMSE: {avg_test_rmse:.4f}')
print(f'Test MSE: {avg_test_mse:.4f}')
print(f'Test MAE: {avg_test_mae:.4f}')

Test RMSE: 18.5903
Test MSE: 367.7499
Test MAE: 15.3865
