In [None]:
!pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import datetime
from sklearn.model_selection import train_test_split

In [None]:
from torch.nn import Transformer
from torch import nn
import torch
import math

In [None]:
print(torch.backends.mps.is_built())

In [None]:
print(torch.backends.mps.is_available())

In [None]:
data = pd.read_csv("./target/train.csv")
data.head()

In [None]:
plt.figure(figsize=(20,5))
plt.plot(range(len(data)), data["광진구"])
data.head()

In [None]:
len(data)

In [None]:
def converting_to_real_dt(col):
    new_col = []
    format_t = '%Y%m%d'
    for i in col:
        dt = datetime.datetime.strptime(str(i), format_t).date()
        new_col.append(dt)
    return new_col

data["date"] = converting_to_real_dt(data["일시"])
data.drop(["일시"], axis=1, inplace=True)

- 천명 단위

In [None]:
data.head()

In [None]:
data_1 = data[["date", "광진구"]]
data_2 = data[["date", "동대문구"]]
data_3 = data[["date", "성동구"]]
data_4 = data[["date", "중랑구"]]

In [None]:
len(data)*0.6 + len(data)*0.2

In [None]:
def spliting(data):
    data_train = data[:877]
    data_train.reset_index(drop=True, inplace=True)
    data_val = data[877:1169]
    data_val.reset_index(drop=True, inplace=True)
    data_test = data[1169:]
    data_test.reset_index(drop=True, inplace=True)
    return data_train, data_val, data_test

In [None]:
data_1_train_df, data_1_val_df, data_1_test_df = spliting(data_1)
data_2_train_df, data_2_val_df, data_2_test_df = spliting(data_2)
data_3_train_df, data_3_val_df, data_3_test_df = spliting(data_3)
data_4_train_df, data_4_val_df, data_4_test_df = spliting(data_4)

In [None]:
print(len(data_1_train_df), len(data_1_val_df), len(data_1_test_df))

In [None]:
data_1_val_df.head()

In [None]:
data_1_train = data_1_train_df["광진구"].to_numpy()
data_1_val = data_1_val_df["광진구"].to_numpy()
data_1_test = data_1_test_df["광진구"].to_numpy()

In [None]:
data_2_train = data_2_train_df["동대문구"].to_numpy()
data_2_val = data_2_val_df["동대문구"].to_numpy()
data_2_test = data_2_test_df["동대문구"].to_numpy()

In [None]:
data_3_train = data_3_train_df["성동구"].to_numpy()
data_3_val = data_3_val_df["성동구"].to_numpy()
data_3_test = data_3_test_df["성동구"].to_numpy()

In [None]:
data_4_train = data_4_train_df["중랑구"].to_numpy()
data_4_val = data_4_val_df["중랑구"].to_numpy()
data_4_test = data_4_test_df["중랑구"].to_numpy()

In [None]:
from torch.utils.data import DataLoader, Dataset

class windowDataset(Dataset):
    def __init__(self, y, input_window=80, output_window=20, stride=5):
        #총 데이터의 개수
        L = y.shape[0]
        #stride씩 움직일 때 생기는 총 sample의 개수
        num_samples = (L - input_window - output_window) // stride + 1

        #input과 output
        X = np.zeros([input_window, num_samples])
        Y = np.zeros([output_window, num_samples])

        for i in np.arange(num_samples):
            start_x = stride*i
            end_x = start_x + input_window
            X[:,i] = y[start_x:end_x]

            start_y = stride*i + input_window
            end_y = start_y + output_window
            Y[:,i] = y[start_y:end_y]

        X = X.reshape(X.shape[0], X.shape[1], 1).transpose((1,0,2))
        Y = Y.reshape(Y.shape[0], Y.shape[1], 1).transpose((1,0,2))
        self.x = X
        self.y = Y
        
        self.len = len(X)
    def __getitem__(self, i):
        return self.x[i], self.y[i, :-1], self.y[i,1:]
    def __len__(self):
        return self.len

In [None]:
iw = 24*14
ow = 24*7

train_dataset = windowDataset(data_1_train, input_window=iw, output_window=ow, stride=1)
train_loader = DataLoader(train_dataset, batch_size=64)

In [None]:
class TFModel(nn.Module):
    def __init__(self,d_model, nhead, nhid, nlayers, dropout=0.5):
        super(TFModel, self).__init__()
        self.transformer = Transformer(d_model=d_model, nhead=nhead, dim_feedforward=nhid, num_encoder_layers=nlayers, num_decoder_layers=nlayers,dropout=dropout)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        self.pos_encoder_d = PositionalEncoding(d_model, dropout)
        self.linear = nn.Linear(d_model, 1)
        self.encoder = nn.Linear(1, d_model)
        self.encoder_d = nn.Linear(1, d_model)

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, src, tgt, srcmask, tgtmask):
        src = self.encoder(src)
        src = self.pos_encoder(src)

        tgt = self.encoder_d(tgt)
        tgt = self.pos_encoder_d(tgt)
        output = self.transformer(src.transpose(0,1), tgt.transpose(0,1), srcmask, tgtmask)
        output = self.linear(output)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

def gen_attention_mask(x):
    mask = torch.eq(x, 0)
    return mask

In [None]:
device = torch.device("cpu")