In [2]:
# library import
import torch
from torch import nn
import torch.optim as optim

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

# module import
from utils import common_utils
from utils.constant_utils import *

In [2]:
df = common_utils.merge_data(Directory.train_data, Directory.test_data)

In [3]:
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

class GridDataset(Dataset):
    def __init__(self):
        total_df = common_utils.merge_data(Directory.train_data, Directory.test_data)
        
        lat_bins = np.linspace(total_df['latitude'].min(), total_df['latitude'].max(), 43)
        long_bins = np.linspace(total_df['longitude'].min(), total_df['longitude'].max(), 28)

        total_df['lat_bin'] = np.digitize(total_df['latitude'], lat_bins)
        total_df['long_bin'] = np.digitize(total_df['longitude'], long_bins)

        train_df = total_df[total_df['type']=='train']
        test_df = total_df[total_df['type']=='test']

        grid = torch.zeros(len(lat_bins), len(long_bins))
        X = torch.stack([grid]*len(train_df))
        y = torch.tensor(train_df['deposit'].values)
        
        for i, data in tqdm(train_df.iterrows()):
            # 아파트 위치의 bin이 1부터 시작해서 -1 해준다.
            lat_bin, long_bin = data['lat_bin']-1, data['long_bin']-1
            X[i,lat_bin,long_bin] = 1

        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (self.X[idx], self.y[idx])

In [4]:
from torch.utils.data import DataLoader

train_data = GridDataset()
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
X, y = next(iter(train_data))
batched_X, batched_y = next(iter(train_loader))

1801228it [02:31, 11917.94it/s]
  self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
  self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)


In [5]:
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(in_features=2240, out_features=1)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [6]:
# batch 단위로 학습
device = 'cuda'

model = CNNModel().to(device)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
losses = []

model.train()

for epoch in tqdm(range(20)):
    epoch_loss = 0.0
    for X, y in train_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred,y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    losses.append(epoch_loss)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss}")

  5%|▌         | 1/20 [02:28<47:09, 148.93s/it]

Epoch 1, Loss: 758788154.3256836


 10%|█         | 2/20 [04:57<44:38, 148.83s/it]

Epoch 2, Loss: 698665665.7675781


 15%|█▌        | 3/20 [07:26<42:09, 148.78s/it]

Epoch 3, Loss: 696310186.8989258


 20%|██        | 4/20 [09:54<39:39, 148.70s/it]

Epoch 4, Loss: 695360188.3120117


 25%|██▌       | 5/20 [12:23<37:10, 148.71s/it]

Epoch 5, Loss: 694891497.3681641


 30%|███       | 6/20 [14:53<34:48, 149.14s/it]

Epoch 6, Loss: 694577529.1230469


 35%|███▌      | 7/20 [17:22<32:16, 148.95s/it]

Epoch 7, Loss: 694343315.6704102


 40%|████      | 8/20 [19:50<29:45, 148.76s/it]

Epoch 8, Loss: 694193162.1000977


 45%|████▌     | 9/20 [22:18<27:14, 148.56s/it]

Epoch 9, Loss: 694103645.6313477


 50%|█████     | 10/20 [24:46<24:44, 148.44s/it]

Epoch 10, Loss: 694030793.9306641


 55%|█████▌    | 11/20 [27:15<22:15, 148.37s/it]

Epoch 11, Loss: 693969778.6435547


 60%|██████    | 12/20 [29:43<19:47, 148.43s/it]

Epoch 12, Loss: 693879657.309082


 65%|██████▌   | 13/20 [32:08<17:11, 147.33s/it]

Epoch 13, Loss: 693852873.0063477


 70%|███████   | 14/20 [34:23<14:22, 143.74s/it]

Epoch 14, Loss: 693811441.9545898


 75%|███████▌  | 15/20 [36:40<11:47, 141.50s/it]

Epoch 15, Loss: 693787450.7358398


 80%|████████  | 16/20 [38:56<09:19, 139.89s/it]

Epoch 16, Loss: 693742740.2773438


 85%|████████▌ | 17/20 [41:11<06:55, 138.54s/it]

Epoch 17, Loss: 693727646.4599609


 90%|█████████ | 18/20 [43:29<04:36, 138.36s/it]

Epoch 18, Loss: 693728183.543457


 95%|█████████▌| 19/20 [45:47<02:18, 138.18s/it]

Epoch 19, Loss: 693701492.5849609


100%|██████████| 20/20 [48:04<00:00, 144.21s/it]

Epoch 20, Loss: 693670800.7519531





In [None]:
# MLP 모듈 (정형 데이터 처리: 금리, 면적 등)
class MLPModel(nn.Module):
    def __init__(self, input_size):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 128)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return x

In [None]:
class MetaDataset(Dataset):
    def __init__(self):
        total_df = common_utils.merge_data(Directory.train_data, Directory.test_data)
        
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (self.X[idx], self.y[idx])