In [2]:
import psycopg2

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import Subset, DataLoader

from Dataset.Embedding_Dataset import Embedding_Dataset
from Model.Embedding import Embedding

from Dataset.LSTM_Dataset import LSTM_Dataset
from Model.LSTM import LSTM

from Dataset.Attention_Dataset import Attention_Dataset
from Model.Attention import LSTMSeq2Seq

from sklearn.preprocessing import StandardScaler

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device('cpu') # 맥
# DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # 윈도우

In [3]:
train_ratio = 0.8

embedding_lr = 0.00001
embedding_weight_decay = 0
embedding_batch = 128
embedding_epochs = 150
encoder_dim_1 = 128
encoder_dim_2 = 256
encoder_dim_3 = 512
embedding_dim = 1024
decoder_dim_1 = 512
decoder_dim_2 = 256
decoder_dim_3 = 128

ml_batch = 1 # 고정
ml_estimators = 150
ml_window_size = 10

lstm_lr = 0.0001
lstm_weight_decay = 0
lstm_batch = 128
lstm_epochs = 150
lstm_hidden_dim = 256
lstm_window_size = 10

nlinear_lr = 0.0001
nlinear_weight_decay = 0
nlinear_batch = 128
nlinear_epochs = 150
nlinear_window_size = 10

attention_lr = 0.0001
attention_weight_decay = 0
attention_batch = 1 # 고정
attention_epochs = 150
attention_hidden_dim = 256
attention_window_size = 10

transformer_lr = 0.00001
transformer_weight_decay = 0
transformer_batch = 1 # 고정
transformer_epochs = 150
transformer_window_size = 10

## Data

In [4]:
connection_info = "host=localhost dbname=postgres user=postgres password=hd219833 port=5432"
conn = psycopg2.connect(connection_info)
table_1_query = '''
    SELECT * FROM building
    '''
table_2_query = '''
    SELECT * FROM economy
    '''
table_3_query = '''
    SELECT * FROM building_price
    '''
table_1 = pd.read_sql(table_1_query,conn) 
table_2 = pd.read_sql(table_2_query,conn)
table_3 = pd.read_sql(table_3_query,conn) 

# table_1 = pd.read_csv('../데이터/Table/table_1.csv') 
# table_2 = pd.read_csv('../데이터/Table/table_2.csv') 
# table_3 = pd.read_csv('../데이터/Table/table_3.csv') 



## Embedding

In [15]:
table_merge = pd.merge(table_1, table_3, how='left', on='aid')
table_merge = pd.merge(table_merge, table_2, how='left', on='did')

apartment = table_merge[[cols for cols in table_merge.columns if cols not in ['aid','location','name','did','year','month','call_rate','m2','price']]]
economy = table_merge[['call_rate','m2']]
price = table_merge[['price']] * 0.0001

apartment_values = apartment.values
economy_values = economy.values
price_values = price.values

input_values = np.concatenate((apartment_values, economy_values), axis=1)
input_values = StandardScaler().fit_transform(input_values)
output_values = price_values

input_tensor = torch.FloatTensor(input_values)
output_tensor = torch.FloatTensor(output_values)

## ML

In [5]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1512_d2_256_d3_128.pth', map_location=DEVICE)

In [6]:
# 동 이름 바꾸기
old_and_new_dongs = {'용산동5가':'한강로동','한강로2가':'한강로동','창동':'창제동','돈암동':'정릉동','거여동':'위례동','문정동':'위례동','장지동':'위례동','문배동':'원효로동','산천동':'원효로동','신창동':'원효로동','원효로1가':'원효로동','화곡동':'우장산동','내발산동':'우장산동','영등포동8가':'영등포동','양평동3가':'양평동','안암동1가':'안암동','염리동':'아현동','성수동2가':'성수2가제2동','성수동1가':'성수1가제1동','중동':'성산동','노고산동':'서교동','신정동':'서강동','창전동':'서강동','삼선동4가':'삼선동','보문동3가':'보문동','동소문동7가':'동선동','당산동4가':'당산제2동','당산동5가':'당산제2동','당산동':'당산제2동','당산동3가':'당산제1동','당산동1가':'당산제1동','당산동2가':'당산제1동','본동':'노량진동','신수동':'노고산동','대흥동':'노고산동','금호동4가':'금호동','금호동2가':'금호동','충무로4가':'광희동','방화동':'공항동','도화동':'공덕동','신공덕동':'공덕동','일원동':'개포동'}
def change_dongs(location):
    parts = location.split(' ')
    if parts[2] in old_and_new_dongs:
        parts[2] = old_and_new_dongs[parts[2]]
    return ' '.join(parts)
table_1['location'] = table_1['location'].apply(change_dongs)

# 정규화
table_1[[cols for cols in table_1.columns if cols not in ['aid','location','name']]] = StandardScaler().fit_transform(table_1[[cols for cols in table_1.columns if cols not in ['aid','location','name']]])
table_2[['call_rate','m2']] = StandardScaler().fit_transform(table_2[['call_rate','m2']])
table_3['price'] = table_3['price'] * 0.0001

model.eval()
model.to(DEVICE)

apartment_complexes_embedding_matrix_with_window_size = [] # (136188, window_size, embedding_dim)
apartment_complexes_price_with_window_size = [] # (136188, 1)

apartment_complexes_location = table_1['location']
apartment_complexes_name = table_1['name']
for apartment_complex_location, apartment_complex_name in zip(apartment_complexes_location, apartment_complexes_name): # 단지별로(702)
    apartment_complex_values = table_1[(table_1['name'] == apartment_complex_name) * (table_1['location'] == apartment_complex_location)][[cols for cols in table_1.columns if cols not in ['aid','location','name']]].values
    apartment_complex_tensor = torch.FloatTensor(apartment_complex_values).to(DEVICE).repeat(204, 1)
    economy_values = table_2[['call_rate','m2']].values
    economy_tensor = torch.FloatTensor(economy_values).to(DEVICE)
    encoder_input_tensor = torch.cat((apartment_complex_tensor, economy_tensor), dim=1) # 2006/01~2022/12까지(204) 12개의 features를 가지는 encoder_input_tensor 생성 # (204, 12)

    apartment_complex_embedding_matrix = np.zeros((encoder_input_tensor.shape[0], embedding_dim)) # (204, 1024)
    with torch.no_grad():
        for i in range(encoder_input_tensor.shape[0]): # 2006/01~2022/12까지 기간별로(204)
            apartment_complex_embedding_vector = model.encoder(encoder_input_tensor[i].unsqueeze(0)).squeeze() # 12 features -> 1024 embedding_dim
            if apartment_complex_embedding_vector.is_cuda:
                apartment_complex_embedding_vector = apartment_complex_embedding_vector.cpu()
            apartment_complex_embedding_matrix[i] = apartment_complex_embedding_vector.numpy()
    apartment_complex_embedding_matrix_tensor = torch.FloatTensor(apartment_complex_embedding_matrix).to(DEVICE) # (204, 1024)

    apartment_complex_aid = table_1[(table_1['name'] == apartment_complex_name) * (table_1['location'] == apartment_complex_location)]['aid'].squeeze()
    price_values = pd.DataFrame({'did': range(0, 204)}).merge(table_3[table_3['aid'] == apartment_complex_aid][['did','price']], on='did', how='outer').fillna(0).set_index('did').values
    price_tensor = torch.FloatTensor(price_values).to(DEVICE) # (204, 1)

    for i in range(apartment_complex_embedding_matrix_tensor.shape[0]-ml_window_size):
        if price_tensor[i+ml_window_size, :] != 0:
            for window in range(ml_window_size):
                apartment_complex_embedding_matrix_concat_tensor = torch.zeros(1, embedding_dim * ml_window_size)
                apartment_complex_embedding_matrix_concat_tensor[:, window*embedding_dim:(window+1)*embedding_dim] = apartment_complex_embedding_matrix_tensor[i+window:i+window+1, :]
            apartment_complexes_embedding_matrix_with_window_size.append(apartment_complex_embedding_matrix_concat_tensor)
            apartment_complexes_price_with_window_size.append(price_tensor[i+ml_window_size, :])

apartment_complexes_embedding_matrix_with_window_size = apartment_complexes_embedding_matrix_with_window_size
apartment_complexes_price_with_window_size = apartment_complexes_price_with_window_size

In [9]:
apartment_complexes_price_with_window_size[0].shape

torch.Size([1])

## LSTM

In [8]:
model = torch.load('../데이터/Checkpoint/embedding_tr_0.8_lr_1e-05_wd_0_batch_128_epochs_131_e1_128_e2_128_e3_512_emb_1024_d1512_d2_256_d3_128.pth', map_location=torch.device('cpu'))

In [11]:
model.eval()

apartment_complexes_embedding_matrix_with_window_size = []
apartment_complexes_price_with_window_size = []

table_1[[cols for cols in table_1.columns if cols not in ['aid','location','name']]] = StandardScaler().fit_transform(table_1[[cols for cols in table_1.columns if cols not in ['aid','location','name']]])
table_2[['call_rate','m2']] = StandardScaler().fit_transform(table_2[['call_rate','m2']])
table_3[['price']] = table_3[['price']] * 0.0001

apartment_complexes_location = table_1['location']
apartment_complexes_name = table_1['name']
for apartment_complex_location, apartment_complex_name in zip(apartment_complexes_location, apartment_complexes_name):
    apartment_complex_values = table_1[(table_1['name'] == apartment_complex_name) * (table_1['location'] == apartment_complex_location)][[cols for cols in table_1.columns if cols not in ['aid','location','name']]].values
    apartment_complex_tensor = torch.FloatTensor(apartment_complex_values).repeat(204, 1)
    economy_values = table_2[['call_rate','m2']].values
    economy_tensor = torch.FloatTensor(economy_values)
    encoder_input_tensor = torch.cat((apartment_complex_tensor, economy_tensor), dim=1)

    apartment_complex_embedding_matrix = np.zeros((encoder_input_tensor.shape[0], embedding_dim))
    with torch.no_grad():
        for i in range(encoder_input_tensor.shape[0]):
            apartment_complex_embedding_vector = model.encoder(encoder_input_tensor[i].unsqueeze(0)).squeeze().numpy()
            apartment_complex_embedding_matrix[i] = apartment_complex_embedding_vector
    apartment_complex_embedding_matrix_tensor = torch.FloatTensor(apartment_complex_embedding_matrix)

    apartment_complex_aid = table_1[(table_1['name'] == apartment_complex_name) * (table_1['location'] == apartment_complex_location)]['aid'].squeeze()
    price_values = pd.DataFrame({'did': range(0, 204)}).merge(table_3[table_3['aid'] == apartment_complex_aid][['did','price']], on='did', how='outer').fillna(0).set_index('did').values
    price_tensor = torch.FloatTensor(price_values)

    for i in range(apartment_complex_embedding_matrix_tensor.shape[0]-lstm_window_size):
        apartment_complexes_embedding_matrix_with_window_size.append(apartment_complex_embedding_matrix_tensor[i:i+lstm_window_size, :])
        apartment_complexes_price_with_window_size.append(price_tensor[i+lstm_window_size, :])

apartment_complexes_embedding_matrix_with_window_size = apartment_complexes_embedding_matrix_with_window_size
apartment_complexes_price_with_window_size = apartment_complexes_price_with_window_size

In [23]:
apartment_complexes_embedding_matrix_with_window_size[0].shape

torch.Size([10, 1024])

## Attention

In [24]:
model = torch.load('../데이터/Checkpoint/embedding_lr_0.01_batch_32_epochs_50_dim_6.pth')

max_apartment_complexes = 38 # 최대 단지 개수

table_1['dong'] = table_1['location'].apply(lambda x: x.split(' ')[2])
dongs = table_1['dong'].unique()

In [46]:
dongs_apartment_complexes_embedding_matrixes_with_window_size_num = [] # 단지 개수 # (전체 동 개수 * 199, 1)
dongs_apartment_complexes_embedding_matrixes_with_window_size_index = [] # y 값이 있는 단지 index # (전체 동 개수 * 199, ?)
dongs_apartment_complexes_embedding_matrixes_with_window_size = [] # (전체 동 개수 * 199, 38, window_size, 6)
dongs_apartment_complexes_prices_with_window_size = [] # (전체 동 개수 * 199, 38, 1)

for dong in dongs: # 동 마다
    # dong_apartment_complexes_embedding_matrixes(동 안의 단지마다 임베팅 matrix 구한 뒤 리스트 형식으로 모으기) 완성 # (동 안의 단지 개수, 204, 6)
    dong_apartment_complexes_values = table_1[table_1['dong'] == dong][[cols for cols in table_1.columns if cols not in ['aid','location','name','dong']]].values # 하나의 동 안의 아파트 단지 값들 # (동 안의 단지 개수, 10)
    economy_values = table_2[['call_rate','m2']].values # 경제 지표 값들 (204, 2)
    economy_tensor = torch.FloatTensor(economy_values) # 경제 지표 텐서 변환

    encoder_input_tensors = torch.zeros(dong_apartment_complexes_values.shape[0], 204, 12) # 인코더 입력 텐서들 초기화(인코더 입력 텐서 여러개) # (동 안의 단지 개수, 204(시점), 12)
    for i, dong_apartment_complex_values in enumerate(dong_apartment_complexes_values):
        dong_apartment_complex_tensor = torch.FloatTensor(dong_apartment_complex_values).repeat(204,1) 
        encoder_input_tensor = torch.cat((dong_apartment_complex_tensor, economy_tensor), dim=1)
        encoder_input_tensors[i] = encoder_input_tensor

    with torch.no_grad():
        dong_apartment_complexes_embedding_matrixes = torch.zeros(encoder_input_tensors.shape[0], 204, embedding_dim) # (동 안의 단지 개수, 204, 6)
        for i in range(encoder_input_tensors.shape[0]): # 동 안의 단지 (204, 6)
            apartment_complex_embedding_matrix = torch.zeros(204,embedding_dim) # (204, 6)
            for j in range(204): # 시점
                apartment_complex_embedding_vector = model.encoder(encoder_input_tensors[i][j].unsqueeze(0)).squeeze() # (6, )
                apartment_complex_embedding_matrix[j] = apartment_complex_embedding_vector
            dong_apartment_complexes_embedding_matrixes[i] = apartment_complex_embedding_matrix


    # dong_apartment_complexes_prices(동 안의 단지마다 가격 구한 뒤 리스트 형식으로 모으기) 완성 # (동 안의 단지 개수, 204, 1)
    dong_apartment_complexes_aids = table_1[table_1['dong'] == dong]['aid'].values # (동 안의 단지 개수, )
    dong_apartment_complexes_prices = torch.zeros(dong_apartment_complexes_aids.shape[0],204,1) # (동 안의 단지 개수, 204, 1)
    for i, dong_apartment_complex_aid in zip(range(dong_apartment_complexes_aids.shape[0]), dong_apartment_complexes_aids): # 동 안의 단지 개수, 동 안의 단지들의 aids
        dong_apartment_complexes_prices[i] = torch.from_numpy(pd.DataFrame({'did': range(0, 204)}).merge(table_3[table_3['aid'] == dong_apartment_complex_aid][['did','price']], on='did', how='outer').fillna(0).set_index('did').values) # (204, 1)


    # dong_apartment_complexes_embedding_matrixes와 dong_apartment_complexes_prices window_size로 나누기
    for i in range(204-window_size): # window_size 고려한 시점(0~199)
        dong_apartment_complexes_embedding_matrixes_with_window_size = torch.zeros(max_apartment_complexes, window_size, embedding_dim) # (38, window_size, 6)
        dong_apartment_complexes_prices_with_window_size = torch.zeros(max_apartment_complexes, 1) # (38, 1)
        for j in range(dong_apartment_complexes_embedding_matrixes.shape[0]): # 동 안의 단지 개수
            dong_apartment_complexes_embedding_matrixes_with_window_size[j] = dong_apartment_complexes_embedding_matrixes[j][i:i+window_size,:] # (window_size, 6)
            dong_apartment_complexes_prices_with_window_size[j] = dong_apartment_complexes_prices[j][i+window_size,:] # (1, )
        dongs_apartment_complexes_embedding_matrixes_with_window_size_num.append(dong_apartment_complexes_embedding_matrixes.shape[0]) # 자연수
        dongs_apartment_complexes_embedding_matrixes_with_window_size_index.append(torch.nonzero(dong_apartment_complexes_prices_with_window_size, as_tuple=False)[:, 0]) # (1, )
        dongs_apartment_complexes_embedding_matrixes_with_window_size.append(dong_apartment_complexes_embedding_matrixes_with_window_size) # (38, window_size, 6)
        dongs_apartment_complexes_prices_with_window_size.append(dong_apartment_complexes_prices_with_window_size) # (38, 1)