In [4]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings(action='ignore')
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
#torch.manual_seed(1015)
# define 'device' to upload tensor in gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
train = pd.read_csv("dacon_12/train.csv",encoding="euc-kr")
train['DateTime']=pd.to_datetime(train.DateTime)
train['date']=train.DateTime.dt.date
train=train.groupby('date').sum().reset_index()

In [6]:
# 스케일링 
mini = train.iloc[:,1:].min()
size=train.iloc[:,1:].max()-train.iloc[:,1:].min()
train.iloc[:,1:]=(train.iloc[:,1:] - mini) / size

input_window = 30
output_window = 7

window_x = np.zeros((train.shape[0] - (input_window + output_window), input_window, 4))
window_y = np.zeros((train.shape[0] - (input_window + output_window), output_window, 4))

for start in range(train.shape[0] - (input_window + output_window)):
    end = start + input_window    
    window_x[start,:, :] = train.iloc[start : end                , 1: ].values
    window_y[start,:, :] = train.iloc[end   : end + output_window, 1: ].values

In [7]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size = input_size,
                            hidden_size = hidden_size,
                            batch_first=True)
        self.hidden_lstm = nn.LSTM(input_size = hidden_size,
                                   hidden_size = hidden_size,
                                   batch_first=True)
        
        self.time_fc = nn.Linear(hidden_size, 4)
    
    def forward(self, x_time):
    
        out_time, _ = self.lstm(x_time)
        out_time, _ = self.hidden_lstm(out_time)
        out_time, _ = self.hidden_lstm(out_time)
        out_time, _ = self.hidden_lstm(out_time)
        
        
        out_time = self.time_fc(out_time[:,-7:, :])
        
        return out_time.view(-1,7,4)
    
model = LSTM(input_size = 4, hidden_size = 30).to(device)

In [8]:
window_x = torch.tensor(window_x).float().to(device)
window_y = torch.tensor(window_y).float().to(device)

# Train model
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss(size_average = True)
num_epochs  = 500
train_error = []
for t in range(num_epochs):
    train_pred = model(window_x)
    loss = criterion(train_pred, window_y) ### trend
    train_error.append(loss)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if t % 10 == 0 and t !=0:
        print(f"{t} Epochs train MSE: {loss.item():1.5f}")

10 Epochs train MSE: 0.02840
20 Epochs train MSE: 0.01277
30 Epochs train MSE: 0.01269
40 Epochs train MSE: 0.00992
50 Epochs train MSE: 0.00828
60 Epochs train MSE: 0.00808
70 Epochs train MSE: 0.00802
80 Epochs train MSE: 0.00795
90 Epochs train MSE: 0.00790
100 Epochs train MSE: 0.00783
110 Epochs train MSE: 0.00774
120 Epochs train MSE: 0.00759
130 Epochs train MSE: 0.00730
140 Epochs train MSE: 0.00664
150 Epochs train MSE: 0.00638
160 Epochs train MSE: 0.00607
170 Epochs train MSE: 0.00580
180 Epochs train MSE: 0.00563
190 Epochs train MSE: 0.00552
200 Epochs train MSE: 0.00544
210 Epochs train MSE: 0.00539
220 Epochs train MSE: 0.00529
230 Epochs train MSE: 0.00514
240 Epochs train MSE: 0.00579
250 Epochs train MSE: 0.00537
260 Epochs train MSE: 0.00498
270 Epochs train MSE: 0.00471
280 Epochs train MSE: 0.00458
290 Epochs train MSE: 0.00461
300 Epochs train MSE: 0.00411
310 Epochs train MSE: 0.00487
320 Epochs train MSE: 0.00409
330 Epochs train MSE: 0.00359
340 Epochs train MS

In [11]:
submission = pd.read_csv("dacon_12/submission.csv", encoding = 'euc-kr')
last_month = torch.tensor(window_x[-1,:,:][np.newaxis,...]).float().to(device)
for start in range((len(submission) - output_window)//7 + 2):
    start = start * 7
    next_week = model(last_month)
    last_month = torch.cat([last_month[-7:], next_week], dim = 1)
    
    pred_week = next_week.cpu().detach().numpy().reshape(output_window,4)
    pred_week = pred_week * size.values + mini.values
    pred_week = pred_week.astype(int)
    
    if start/7 == (len(submission) - output_window)//7 + 1:
        submission.iloc[start :, 1:] = pred_week[-submission.iloc[start :, 1:].shape[0]:,:]
    else:
        submission.iloc[start : start + output_window, 1:] = pred_week
submission

Unnamed: 0,DateTime,사용자,세션,신규방문자,페이지뷰
0,2020-11-09,1970,1934,417,45451
1,2020-11-10,4852,4910,1296,127442
2,2020-11-11,5281,5394,1419,143775
3,2020-11-12,4470,4483,1097,118084
4,2020-11-13,4317,4271,1075,108960
...,...,...,...,...,...
56,2021-01-04,2905,2815,701,72509
57,2021-01-05,2719,2622,698,65416
58,2021-01-06,2320,2249,592,54456
59,2021-01-07,1781,1780,414,42581


In [12]:
submission.to_csv('submission_1223_1.csv', index = False, encoding = 'euc-kr')