In [1]:
import psycopg2

import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor

import joblib
from sklearn.metrics import mean_squared_error

import torch
from torch import nn
from torch.utils.data import Subset, DataLoader

from Dataset.Embedding_Dataset import Embedding_Dataset
from Model.Embedding import Embedding

from Dataset.Apartment_Complex_Dataset import Apartment_Complex_Dataset
from Model.LSTM import LSTM
from Model.NLinear import NLinear
from Model.Transformer import Transformer

from Dataset.Dong_Dataset import Dong_Dataset
from Model.Attention import LSTMSeq2Seq
from Model.LSTM_Attention import LSTMAttention

from Dataset.Test_Dataset import Test_Dataset

from utils import *

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Train

In [2]:
train_ratio = 0.8

embedding_lr = 0.00001
embedding_weight_decay = 0
embedding_batch = 128
embedding_epochs = 150
encoder_dim_1 = 128
encoder_dim_2 = 256
encoder_dim_3 = 512
embedding_dim = 1024
decoder_dim_1 = 512
decoder_dim_2 = 256
decoder_dim_3 = 128

ml_batch = 128
ml_estimators = 150
ml_window_size = 10

lstm_lr = 0.0001
lstm_weight_decay = 0
lstm_batch = 128
lstm_epochs = 150
lstm_hidden_dim = 256
lstm_window_size = 10

nlinear_lr = 0.000001
nlinear_weight_decay = 0
nlinear_batch = 128
nlinear_epochs = 500
nlinear_window_size = 10

attention_lr = 0.000001
attention_weight_decay = 0
attention_batch = 1 # 고정
attention_epochs = 500
attention_hidden_dim = 256
attention_window_size = 10

lstm_att_window_size = 5
lstm_att_hidden_dim = 256
lstm_att_output_dim = 1
lstm_att_epochs = 50
lstm_att_lr = 0.0001
lstm_att_batch = 1

transformer_lr = 0.00001
transformer_weight_decay = 0
transformer_batch = 1 # 고정
transformer_epochs = 500
transformer_window_size = 10

## Data

In [3]:
connection_info = "host=localhost dbname=postgres user=postgres password=hd219833 port=5432"
conn = psycopg2.connect(connection_info)
table_1_query = '''
    SELECT * FROM building
    '''
table_2_query = '''
    SELECT * FROM economy
    '''
table_3_query = '''
    SELECT * FROM building_price
    '''
table_1 = pd.read_sql(table_1_query,conn) 
table_2 = pd.read_sql(table_2_query,conn)
table_3 = pd.read_sql(table_3_query,conn) 

# table_1 = pd.read_csv('../데이터/Table/table_1.csv') 
# table_2 = pd.read_csv('../데이터/Table/table_2.csv') 
# table_3 = pd.read_csv('../데이터/Table/table_3.csv') 



## Embedding

In [15]:
table_merge = pd.merge(table_1, table_3, how='left', on='aid')
table_merge = pd.merge(table_merge, table_2, how='left', on='did')

apartment = table_merge[[cols for cols in table_merge.columns if cols not in ['aid','location','name','did','year','month','call_rate','m2','price']]]
economy = table_merge[['call_rate','m2']]
price = table_merge[['price']] * 0.0001

apartment_values = apartment.values
economy_values = economy.values
price_values = price.values

input_values = np.concatenate((apartment_values, economy_values), axis=1)
input_values = StandardScaler().fit_transform(input_values)
output_values = price_values

input_tensor = torch.FloatTensor(input_values)
output_tensor = torch.FloatTensor(output_values)

## Apartment Complex

### ML

In [33]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1512_d2_256_d3_128.pth', map_location=torch.device('cpu'))
dataset = Apartment_Complex_Dataset(model, table_1, table_2, table_3, embedding_dim, ml_window_size, 'ML', DEVICE)
dataloader = DataLoader(dataset, batch_size=lstm_batch, shuffle=False, drop_last=True)

In [34]:
for data in dataloader:
    print(data[0].shape)
    print(data[1].shape)
    break

torch.Size([128, 1, 10240])
torch.Size([128, 1])


### DL

In [35]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1512_d2_256_d3_128.pth', map_location=torch.device('cpu'))
dataset = Apartment_Complex_Dataset(model, table_1, table_2, table_3, embedding_dim, lstm_window_size, 'DL', DEVICE)
dataloader = DataLoader(dataset, batch_size=lstm_batch, shuffle=False, drop_last=True)

In [36]:
for data in dataloader:
    print(data[0].shape)
    print(data[1].shape)
    break

torch.Size([128, 10, 1024])
torch.Size([128, 1])


## Dong

In [4]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1512_d2_256_d3_128.pth', map_location=DEVICE)
dataset = Dong_Dataset(model, table_1, table_2, table_3, embedding_dim, attention_window_size, DEVICE)
dataloader = DataLoader(dataset, batch_size=attention_batch, shuffle=False, drop_last=True)

In [10]:
for data in dataloader:
    print(data[0].shape)
    print(data[1].shape)
    print(data[2].shape)
    print(data[3].shape)
    break

torch.Size([1, 38, 10, 1024])
torch.Size([1])
torch.Size([1, 3])
torch.Size([1, 38, 1])


# Test

In [23]:
embedding_dim = 1024
window_size = 10
batch_size = 1

## Data

In [3]:
table_1 = pd.read_csv('../데이터/Table/test_table_1.csv') 
table_2 = pd.read_csv('../데이터/Table/test_table_2.csv') 
table_3 = pd.read_csv('../데이터/Table/test_table_3.csv') 

## Dataset & Dataloader

In [27]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1512_d2_256_d3_128.pth', map_location=torch.device('cpu'))
dataset = Test_Dataset(model, table_1, table_2, table_3, embedding_dim, window_size, DEVICE)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=True)

In [28]:
for data in dataloader:
    print(data[0].shape)
    print(data[1].shape)
    print(data[2].shape)
    print(data[3].shape)
    break

RuntimeError: stack expects each tensor to be equal size, but got [1] at entry 0 and [2] at entry 1