In [2]:
import random
import numpy as np
import pandas as pd

import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

import joblib
from sklearn.metrics import mean_squared_error

import torch
from torch import nn
from torch.utils.data import DataLoader

from Dataset.Apartment_Complex_Dataset import Apartment_Complex_Dataset
from Dataset.Dong_Dataset import Dong_Dataset

from Model.Embedding import Embedding
from Model.LSTM import LSTM
from Model.NLinear import NLinear
from Model.Transformer import Transformer
from Model.Attention import LSTMSeq2Seq
from Model.LSTM_Attention import LSTMAttention

from utils import *

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Hyperparameters

In [5]:
embedding_dim = 1024
window_size = 10
batch_size = 1

ml_batch = 128
ml_estimators = 150
ml_window_size = 10

## Data

In [6]:
table_1 = pd.read_csv('../데이터/Table/test_table_1.csv') 
table_2 = pd.read_csv('../데이터/Table/test_table_2.csv') 
table_3 = pd.read_csv('../데이터/Table/test_table_3.csv') 

## DL Dataset & Dataloader

In [8]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1_512_d2_256_d3_128.pth', map_location=torch.device('cpu'))
dataset = Dong_Dataset(model, table_1, table_2, table_3, embedding_dim, window_size, 'TEST', DEVICE)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=True)

In [5]:
for data in dataloader:
    print(data[0].shape)
    print(data[1].shape)
    print(data[2].shape)
    print(data[3].shape)
    break

torch.Size([1, 24, 10, 1024])
torch.Size([1])
torch.Size([1, 1])
torch.Size([1, 24, 1])


## ML Dataset & Dataloader

In [10]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1_512_d2_256_d3_128.pth', map_location=torch.device('cpu'))
dataset = Apartment_Complex_Dataset(model, table_1, table_2, table_3, embedding_dim, ml_window_size, 'ML', 'TEST', DEVICE)
dataloader = DataLoader(dataset, batch_size=ml_batch, shuffle=False, drop_last=True)

### XGBoost

In [9]:
model =  joblib.load('../데이터/Checkpoint/xgboost_tr0.8_batch_128_estimators_150_ws10.pkl')

rmses = []
for data in dataloader:
    X, y = data[0].squeeze().cpu().numpy(), data[1].squeeze().cpu().numpy()
    y_pred = model.predict(X)
    rmse = mean_squared_error(y, y_pred, squared=False)
    rmses.append(rmse)
print("Average RMSE:", np.mean(rmses))

Average RMSE: 10782874000000.0


### lightGBM

In [7]:
model =  joblib.load('../데이터/Checkpoint/lightgbm_tr0.8_batch_128_estimators_150_ws10.pkl')

rmses = []
for data in dataloader:
    X, y = data[0].squeeze().cpu().numpy(), data[1].squeeze().cpu().numpy()
    y_pred = model.predict(X)
    rmse = mean_squared_error(y, y_pred, squared=False)
    rmses.append(rmse)
print("Average RMSE:", np.mean(rmses))

Average RMSE: 24.733025994709617


### CatBoost

In [8]:
model =  joblib.load('../데이터/Checkpoint/catboost_tr0.8_batch_128_estimators_150_ws10.pkl')

rmses = []
for data in dataloader:
    X, y = data[0].squeeze().cpu().numpy(), data[1].squeeze().cpu().numpy()
    y_pred = model.predict(X)
    rmse = mean_squared_error(y, y_pred, squared=False)
    rmses.append(rmse)
print("Average RMSE:", np.mean(rmses))

Average RMSE: 15.491045798972632
