In [1]:
import psycopg2

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import Subset, DataLoader

from Dataset.Embedding_Dataset import Embedding_Dataset
from Model.Embedding import Embedding

from Dataset.LSTM_Dataset import LSTM_Dataset
from Model.LSTM import LSTM

from Dataset.Attention_Dataset import Attention_Dataset
from Model.Attention import LSTMSeq2Seq

SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device('cpu') # CPU
# DEVICE = torch.device('mps:0' if torch.backends.mps.is_available() else 'cpu') # 맥
# DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # 윈도우

In [2]:
embedding_lr = 0.0001
embedding_batch = 512
embedding_epochs = 150
encoder_dim_1 = 256
encoder_dim_2 = 512
embedding_dim = 1024
decoder_dim_1 = 512
decoder_dim_2 = 256

lstm_lr = 0.01
lstm_batch = 1
lstm_epochs = 50
hidden_dim = 128
output_dim = 1
window_size = 5

## Data

In [3]:
connection_info = "host=localhost dbname=postgres user=postgres password=hd219833 port=5432"
conn = psycopg2.connect(connection_info)
table_1_query = '''
    SELECT * FROM building
    '''
table_2_query = '''
    SELECT * FROM economy
    '''
table_3_query = '''
    SELECT * FROM building_price
    '''
table_1 = pd.read_sql(table_1_query,conn) 
table_2 = pd.read_sql(table_2_query,conn)
table_3 = pd.read_sql(table_3_query,conn) 



## Embedding

In [4]:
table_merge = pd.merge(table_1, table_3, how='left', on='aid')
table_merge = pd.merge(table_merge, table_2, how='left', on='did')

apartment = table_merge[[cols for cols in table_merge.columns if cols not in ['aid','location','name','did','year','month','call_rate','m2','price']]]
economy = table_merge[['call_rate','m2']]
price = table_merge[['price']]

apartment_tensor = torch.FloatTensor(apartment.values)
economy_tensor = torch.FloatTensor(economy.values)
price_tensor = torch.FloatTensor(price.values)

input_tensor = torch.cat((apartment_tensor, economy_tensor), dim=1)
output_tensor = price_tensor

## LSTM

In [5]:
model = torch.load('../데이터/Checkpoint/embedding_train_0.8_lr_0.0001_batch_512_epochs_150_e1_256_e2_256_emb_1024_d1512_d2_256.pth')

In [6]:
model.eval()

apartment_complexes_embedding_matrix_with_window_size = []
apartment_complexes_price_with_window_size = []

apartment_complexes_location = table_1['location']
apartment_complexes_name = table_1['name']
for apartment_complex_location, apartment_complex_name in zip(apartment_complexes_location, apartment_complexes_name):
    apartment_complex_values = table_1[(table_1['name'] == apartment_complex_name) * (table_1['location'] == apartment_complex_location)][[cols for cols in table_1.columns if cols not in ['aid','location','name']]].values
    apartment_complex_tensor = torch.FloatTensor(apartment_complex_values).repeat(204, 1)
    economy_values = table_2[['call_rate','m2']].values
    economy_tensor = torch.FloatTensor(economy_values)
    encoder_input_tensor = torch.cat((apartment_complex_tensor, economy_tensor), dim=1)

    apartment_complex_embedding_matrix = np.zeros((encoder_input_tensor.shape[0], embedding_dim))
    with torch.no_grad():
        for i in range(encoder_input_tensor.shape[0]):
            apartment_complex_embedding_vector = model.encoder(encoder_input_tensor[i].unsqueeze(0)).squeeze().numpy()
            apartment_complex_embedding_matrix[i] = apartment_complex_embedding_vector
    apartment_complex_embedding_matrix_tensor = torch.FloatTensor(apartment_complex_embedding_matrix)

    apartment_complex_aid = table_1[(table_1['name'] == apartment_complex_name) * (table_1['location'] == apartment_complex_location)]['aid'].squeeze()
    price_values = pd.DataFrame({'did': range(0, 204)}).merge(table_3[table_3['aid'] == apartment_complex_aid][['did','price']], on='did', how='outer').fillna(0).set_index('did').values
    price_tensor = torch.FloatTensor(price_values)

    for i in range(apartment_complex_embedding_matrix_tensor.shape[0]-window_size):
        apartment_complexes_embedding_matrix_with_window_size.append(apartment_complex_embedding_matrix_tensor[i:i+window_size, :])
        apartment_complexes_price_with_window_size.append(price_tensor[i+window_size, :])

apartment_complexes_embedding_matrix_with_window_size = apartment_complexes_embedding_matrix_with_window_size
apartment_complexes_price_with_window_size = apartment_complexes_price_with_window_size


AttributeError: 'Embedding' object has no attribute 'encoder'

## Attention

In [24]:
model = torch.load('../데이터/Checkpoint/embedding_lr_0.01_batch_32_epochs_50_dim_6.pth')

max_apartment_complexes = 38 # 최대 단지 개수

table_1['dong'] = table_1['location'].apply(lambda x: x.split(' ')[2])
dongs = table_1['dong'].unique()

In [46]:
dongs_apartment_complexes_embedding_matrixes_with_window_size_num = [] # 단지 개수 # (전체 동 개수 * 199, 1)
dongs_apartment_complexes_embedding_matrixes_with_window_size_index = [] # y 값이 있는 단지 index # (전체 동 개수 * 199, ?)
dongs_apartment_complexes_embedding_matrixes_with_window_size = [] # (전체 동 개수 * 199, 38, window_size, 6)
dongs_apartment_complexes_prices_with_window_size = [] # (전체 동 개수 * 199, 38, 1)

for dong in dongs: # 동 마다
    # dong_apartment_complexes_embedding_matrixes(동 안의 단지마다 임베팅 matrix 구한 뒤 리스트 형식으로 모으기) 완성 # (동 안의 단지 개수, 204, 6)
    dong_apartment_complexes_values = table_1[table_1['dong'] == dong][[cols for cols in table_1.columns if cols not in ['aid','location','name','dong']]].values # 하나의 동 안의 아파트 단지 값들 # (동 안의 단지 개수, 10)
    economy_values = table_2[['call_rate','m2']].values # 경제 지표 값들 (204, 2)
    economy_tensor = torch.FloatTensor(economy_values) # 경제 지표 텐서 변환

    encoder_input_tensors = torch.zeros(dong_apartment_complexes_values.shape[0], 204, 12) # 인코더 입력 텐서들 초기화(인코더 입력 텐서 여러개) # (동 안의 단지 개수, 204(시점), 12)
    for i, dong_apartment_complex_values in enumerate(dong_apartment_complexes_values):
        dong_apartment_complex_tensor = torch.FloatTensor(dong_apartment_complex_values).repeat(204,1) 
        encoder_input_tensor = torch.cat((dong_apartment_complex_tensor, economy_tensor), dim=1)
        encoder_input_tensors[i] = encoder_input_tensor

    with torch.no_grad():
        dong_apartment_complexes_embedding_matrixes = torch.zeros(encoder_input_tensors.shape[0], 204, embedding_dim) # (동 안의 단지 개수, 204, 6)
        for i in range(encoder_input_tensors.shape[0]): # 동 안의 단지 (204, 6)
            apartment_complex_embedding_matrix = torch.zeros(204,embedding_dim) # (204, 6)
            for j in range(204): # 시점
                apartment_complex_embedding_vector = model.encoder(encoder_input_tensors[i][j].unsqueeze(0)).squeeze() # (6, )
                apartment_complex_embedding_matrix[j] = apartment_complex_embedding_vector
            dong_apartment_complexes_embedding_matrixes[i] = apartment_complex_embedding_matrix


    # dong_apartment_complexes_prices(동 안의 단지마다 가격 구한 뒤 리스트 형식으로 모으기) 완성 # (동 안의 단지 개수, 204, 1)
    dong_apartment_complexes_aids = table_1[table_1['dong'] == dong]['aid'].values # (동 안의 단지 개수, )
    dong_apartment_complexes_prices = torch.zeros(dong_apartment_complexes_aids.shape[0],204,1) # (동 안의 단지 개수, 204, 1)
    for i, dong_apartment_complex_aid in zip(range(dong_apartment_complexes_aids.shape[0]), dong_apartment_complexes_aids): # 동 안의 단지 개수, 동 안의 단지들의 aids
        dong_apartment_complexes_prices[i] = torch.from_numpy(pd.DataFrame({'did': range(0, 204)}).merge(table_3[table_3['aid'] == dong_apartment_complex_aid][['did','price']], on='did', how='outer').fillna(0).set_index('did').values) # (204, 1)


    # dong_apartment_complexes_embedding_matrixes와 dong_apartment_complexes_prices window_size로 나누기
    for i in range(204-window_size): # window_size 고려한 시점(0~199)
        dong_apartment_complexes_embedding_matrixes_with_window_size = torch.zeros(max_apartment_complexes, window_size, embedding_dim) # (38, window_size, 6)
        dong_apartment_complexes_prices_with_window_size = torch.zeros(max_apartment_complexes, 1) # (38, 1)
        for j in range(dong_apartment_complexes_embedding_matrixes.shape[0]): # 동 안의 단지 개수
            dong_apartment_complexes_embedding_matrixes_with_window_size[j] = dong_apartment_complexes_embedding_matrixes[j][i:i+window_size,:] # (window_size, 6)
            dong_apartment_complexes_prices_with_window_size[j] = dong_apartment_complexes_prices[j][i+window_size,:] # (1, )
        dongs_apartment_complexes_embedding_matrixes_with_window_size_num.append(dong_apartment_complexes_embedding_matrixes.shape[0]) # 자연수
        dongs_apartment_complexes_embedding_matrixes_with_window_size_index.append(torch.nonzero(dong_apartment_complexes_prices_with_window_size, as_tuple=False)[:, 0]) # (1, )
        dongs_apartment_complexes_embedding_matrixes_with_window_size.append(dong_apartment_complexes_embedding_matrixes_with_window_size) # (38, window_size, 6)
        dongs_apartment_complexes_prices_with_window_size.append(dong_apartment_complexes_prices_with_window_size) # (38, 1)