In [1]:
# 필요 library import

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
def split(features, target):
    n_samples = features.shape[0]
    n_val = int(0.2 * n_samples)

    shuffled_indices = torch.randperm(n_samples)

    train_indices = shuffled_indices[:-n_val]
    val_indices = shuffled_indices[-n_val:]

    features_train = features[train_indices]
    target_train = target[train_indices]

    features_val = features[val_indices]
    target_val = target[val_indices]

    return features_train, target_train, features_val, target_val

In [3]:
# 모델을 학습시킬 training loop 정의\n",
# Model을 parameter로 사용하기 때문에 모델별 정의 불필요\n
def training_loop(n_epochs, model, loss, optimizer, features_train, target_train, features_val, target_val):
        
    for epoch in range(1, n_epochs + 1):
        #Training dataset에 대해 forward-pass 실행
        predict_train = model(features_train)
        loss_train = loss(predict_train, target_train)

        # Validation data에 대한 loss는 학습에 사용하지 않으므로 with torch.no_grad() 사용\n",
        with torch.no_grad():
            predict_val = model(features_val)
            loss_val = loss(predict_val, target_val)
            assert loss_val.requires_grad == False

        # Backward-pass를 통한 학습 실행
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()

        # 학습 진행 상황을 확인하기 위해 2000 epoch 마다 training loss와 validation loss를 출력\n",
        if epoch % 10000 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"f" Validation loss {loss_val.item():.4f}")

In [4]:
# 매출 예측용 dataset load
data = pd.read_csv("../../dataset/to_trian_data/close.csv", encoding='utf8')

In [5]:
scale_to_100 = [\
                '편의점수',
                '슈퍼마켓수',
                '여관수',
                '외식업수',
                '주유소수',
                '피시방수',
                '당구장수',
                '노래방수',
                '독서실수',
                '지하철역수'
               ]
scale_to_1000 = [\
                 '1인가구',
                 '2인가구',
                 '3인가구',
                 '4인가구',
                
                ]
scale_to_10000 = [\
                  '0-9세주거',
                  '10-19세주거',
                  '20-29세주거',
                  '30-39세주거',
                  '40-49세주거',
                  '50-59세주거',
                  '60-69세주거',
                  '70-79세주거',
                  '80-89세주거',
                  '직장인구',
                  '생활인구'
                 ]
scale_to_100000 = ['임대시세']

for s in scale_to_100:
    data[s] /= 100
    
for s in scale_to_1000:
    data[s] /= 1000

for s in scale_to_10000:
    data[s] /= 10000
    
for s in scale_to_100000:
    data[s] /= 100000

In [6]:
target = data['폐업률'].values.astype('float32').ravel()
features = data.drop(columns=['폐업률']).values.astype('float32')

In [7]:
tensor_features = torch.tensor(features)
tensor_target = torch.tensor(target).unsqueeze(-1)

In [13]:
features_train, target_train, features_val, target_val = split(tensor_features, tensor_target)

In [9]:
input_len = 26
output_len = 1
epochs = 100000
learning_rate = 1e-4
loss = nn.MSELoss()

In [14]:
model = nn.Linear(input_len,output_len)

optimizer = optim.Adam(model.parameters(),learning_rate)

training_loop(epochs, model, loss, optimizer, features_train, target_train, features_val, target_val)

Epoch 10000, Training loss 28.7494, Validation loss 19.4616
Epoch 20000, Training loss 24.9155, Validation loss 16.7130
Epoch 30000, Training loss 23.9296, Validation loss 16.1604
Epoch 40000, Training loss 23.6879, Validation loss 16.1812
Epoch 50000, Training loss 23.6140, Validation loss 16.2225
Epoch 60000, Training loss 23.5907, Validation loss 16.2313
Epoch 70000, Training loss 23.5752, Validation loss 16.2366
Epoch 80000, Training loss 23.5644, Validation loss 16.2391
Epoch 90000, Training loss 23.5553, Validation loss 16.2393
Epoch 100000, Training loss 23.5470, Validation loss 16.2388


In [None]:
torch.save(model,'../../models/closed.pt')