In [None]:
# 필요 library import

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
def split(features, target):
    n_samples = features.shape[0]
    n_val = int(0.2 * n_samples)

    shuffled_indices = torch.randperm(n_samples)

    train_indices = shuffled_indices[:-n_val]
    val_indices = shuffled_indices[-n_val:]

    features_train = features[train_indices]
    target_train = target[train_indices]

    features_val = features[val_indices]
    target_val = target[val_indices]

    return features_train, target_train, features_val, target_val

In [None]:
# 모델을 학습시킬 training loop 정의\n",
# Model을 parameter로 사용하기 때문에 모델별 정의 불필요\n
def training_loop(n_epochs, model, loss, optimizer, features, target):
    # 전체 dataset을 training dataset과 validation dataset으로 분할
    features_train, target_train, features_val, target_val = split(features, target)
        
    for epoch in range(1, n_epochs + 1):
        #Training dataset에 대해 forward-pass 실행
        predict_train = model(features_train)
        loss_train = loss(predict_train, target_train)

        # Validation data에 대한 loss는 학습에 사용하지 않으므로 with torch.no_grad() 사용\n",
        with torch.no_grad():
            predict_val = model(features_val)
            loss_val = loss(predict_val, target_val)
            assert loss_val.requires_grad == False

        # Backward-pass를 통한 학습 실행
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        # 학습 진행 상황을 확인하기 위해 2000 epoch 마다 training loss와 validation loss를 출력\n",
        if epoch % 10000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"f" Validation loss {loss_val.item():.4f}")

In [None]:
# 매출 예측용 dataset load
data_2018_revenue = pd.read_csv("../ProcessedDataSet/동별 매출 데이터/2018_revenue.csv", encoding='utf8')
data_2019_revenue = pd.read_csv("../ProcessedDataSet/동별 매출 데이터/2019_revenue.csv", encoding='utf8')
data_2020_revenue = pd.read_csv("../ProcessedDataSet/동별 매출 데이터/2020_revenue.csv", encoding='utf8')

data_revenue = pd.concat([data_2018_revenue, data_2019_revenue, data_2020_revenue])

In [None]:
scale_to_10000 = [\
                  '1인가구',
                  '2인가구',
                  '3인가구',
                  '4인가구',
                  '0-9세주거',
                  '10-19세주거',
                  '20-29세주거',
                  '30-39세주거',
                  '40-49세주거',
                  '50-59세주거',
                  '60-69세주거',
                  '70-79세주거',
                  '80-89세주거'
                 ]
scale_to_100000 = ['임대시세']
scale_to_10000000 = ['월매출']

for s in scale_to_10000:
    data_revenue[s] /= 10000
    
for s in scale_to_100000:
    data_revenue[s] /= 100000
    
for s in scale_to_10000000:
    data_revenue[s] /= 10000000

In [None]:
# Dataset을 feature와 target으로 분할
features_revenue_col = [\
                        '임대시세',
                        '0-9세주거',
                        '10-19세주거',
                        '20-29세주거',
                        '30-39세주거',
                        '40-49세주거',
                        '50-59세주거',
                        '60-69세주거',
                        '70-79세주거',
                        '80-89세주거',
                        '1인가구',
                        '2인가구',
                        '3인가구',
                        '4인가구',
                        '편의점수',
                        '지하철역수'
                       ]

target_revenue_col = ['월매출']

features_revenue = data_revenue[features_revenue_col].values.astype('float32')
target_revenue = data_revenue[target_revenue_col].values.astype('float32')

In [None]:
tensor_features_revenue = torch.tensor(features_revenue)
tensor_target_revenue = torch.tensor(target_revenue)

In [None]:
input_len = 16
output_len = 1
epochs = 100000
learning_rate = 5e-4
loss = nn.L1Loss()

In [None]:
model_revenue = nn.Sequential(\
                             nn.Linear(input_len,2*input_len),
                             nn.Tanh(),
                             nn.Dropout(0.25),
                             nn.Linear(2*input_len,input_len),
                             nn.Tanh(),
                             nn.Dropout(0.25),
                             nn.Linear(input_len,output_len))

optimizer_revenue = optim.Adam(model_revenue.parameters(),learning_rate)

training_loop(n_epochs = epochs, model = model_revenue, loss = loss, optimizer = optimizer_revenue, features = tensor_features_revenue, target = tensor_target_revenue)

In [None]:
data_2018_closed = pd.read_csv("../ProcessedDataSet/동별 폐업률 데이터/2018_closed.csv", encoding='utf8')
data_2019_closed = pd.read_csv("../ProcessedDataSet/동별 폐업률 데이터/2019_closed.csv", encoding='utf8')
data_2020_closed = pd.read_csv("../ProcessedDataSet/동별 폐업률 데이터/2020_closed.csv", encoding='utf8')

data_closed = pd.concat([data_2018_closed, data_2019_closed, data_2020_closed])

In [None]:
# Dataset을 feature와 target으로 분할
features_closed_col = [\
                        '임대시세',
                        '0-9세주거',
                        '10-19세주거',
                        '20-29세주거',
                        '30-39세주거',
                        '40-49세주거',
                        '50-59세주거',
                        '60-69세주거',
                        '70-79세주거',
                        '80-89세주거',
                        '1인가구',
                        '2인가구',
                        '3인가구',
                        '4인가구',
                        '편의점수',
                        '지하철역수'
                       ]

target_closed_col = ['폐업률']

features_closed = data_closed[features_closed_col].values.astype('float32')
target_closed = data_closed[target_closed_col].values.astype('float32')

In [None]:
tensor_features_closed = torch.tensor(features_closed)
tensor_target_closed = torch.tensor(target_closed)

In [None]:
model_closed = nn.Sequential(\
                             nn.Linear(input_len,input_len),
                             nn.Tanh(),
                             nn.Dropout(0.25),
                             nn.Linear(input_len,output_len))

optimizer_closed = optim.Adam(model_closed.parameters(),learning_rate)

training_loop(n_epochs = epochs, model = model_closed, loss = loss, optimizer = optimizer_closed, features = tensor_features_closed, target = tensor_target_closed)