- 시계열 데이터에 대한 예측
    - sunspots dataset : 단변량 시계열 데이터
- tensorflow로 모델 개발
- torch로 동일한 모델 개발

# Sunspots dataset

In [1]:
import numpy as np
import pandas as pd
import urllib

url = 'https://storage.googleapis.com/download.tensorflow.org/data/Sunspots.csv'
urllib.request.urlretrieve(url, 'sunspots.csv')
sunspots_df = pd.read_csv('sunspots.csv')
sunspots_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Monthly Mean Total Sunspot Number
0,0,1749-01-31,96.7
1,1,1749-02-28,104.3
2,2,1749-03-31,116.7
3,3,1749-04-30,92.8
4,4,1749-05-31,141.7


In [2]:
time_step = sunspots_df['Date'].values
sunspots = sunspots_df['Monthly Mean Total Sunspot Number'].values
sunspots_max = sunspots.max()
sunspots_min = sunspots.min()
sunspots = (sunspots - sunspots_min) / (sunspots_max - sunspots_min)

In [3]:
split_time = 3000
X_train  = sunspots[:split_time]
X_valid = sunspots[split_time:]
time_train = time_step[:split_time]
time_valid = time_step[split_time:]

# tensorflow

In [4]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))
    ds = ds.shuffle(shuffle_buffer)
    # 종속변수와 독립변수를 나눔
    ds = ds.map(lambda w: (w[:-1], w[1:]))
    return ds.batch(batch_size).prefetch(1)

In [5]:
import tensorflow as tf
from tensorflow import keras

# 0~29의 데이터를 이용해 뒤의 1~30의 데이터를 예측함
window_size = 30
batch_size = 32
shuffle_buffer = 1000

tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)

train_set = windowed_dataset(X_train, window_size, batch_size, shuffle_buffer)
valid_set = windowed_dataset(X_valid, window_size, batch_size, shuffle_buffer)

In [7]:
for i,(x_tf,y_tf) in enumerate(train_set.take(1)):
    if i==1:
        break

In [8]:
x_tf.shape, y_tf.shape

(TensorShape([32, 30, 1]), TensorShape([32, 30, 1]))

In [32]:
# first batch
x_tf[0,:10,0], y_tf[0,:10,0]

(<tf.Tensor: shape=(10,), dtype=float64, numpy=
 array([0.185334  , 0.18257157, 0.16248117, 0.09718734, 0.20015068,
        0.23606228, 0.1594676 , 0.12506278, 0.15243596, 0.24234053])>,
 <tf.Tensor: shape=(10,), dtype=float64, numpy=
 array([0.18257157, 0.16248117, 0.09718734, 0.20015068, 0.23606228,
        0.1594676 , 0.12506278, 0.15243596, 0.24234053, 0.40256153])>)

In [10]:
np.where(sunspots == x_tf[0,0,0].numpy())

(array([184, 652, 808]),)

In [13]:
sunspots[808:808+window_size]

array([0.185334  , 0.18257157, 0.16248117, 0.09718734, 0.20015068,
       0.23606228, 0.1594676 , 0.12506278, 0.15243596, 0.24234053,
       0.40256153, 0.11049724, 0.08864892, 0.16750377, 0.20919136,
       0.18834756, 0.15369161, 0.10723255, 0.1210447 , 0.11878453,
       0.14590658, 0.09392265, 0.10647916, 0.1443998 , 0.22225013,
       0.15268709, 0.11752888, 0.13209442, 0.10899046, 0.13259669])

In [14]:
# 0~29의 데이터를 이용해 뒤의 1~30의 데이터를 예측함
a,_ = tf.unique(tf.reshape(x_tf[:,1:,:] == y_tf[:,:-1,:],-1))
a

<tf.Tensor: shape=(1,), dtype=bool, numpy=array([ True])>

In [63]:
# 참고
print(x_tf.shape)
x_conv_tf = keras.layers.Conv1D(filters=60, kernel_size=5, padding='causal', activation='relu', input_shape=[None, 1])(x_tf)
print(x_conv_tf.shape)
x_lstm1_tf = keras.layers.LSTM(60, return_sequences=True)(x_conv_tf)
print(x_lstm1_tf.shape)
x_lstm2_tf = keras.layers.LSTM(60, return_sequences=True)(x_lstm1_tf)
print(x_lstm2_tf.shape)
x_dense1_tf = keras.layers.Dense(30, activation='relu')(x_lstm2_tf)
print(x_dense1_tf.shape)

(32, 30, 1)
(32, 30, 60)
(32, 30, 60)
(32, 30, 60)
(32, 30, 30)


In [68]:
# 참고
# (32,30,60)에서 30은 window size, 60은 feature
# LSTM layer의 첫번째 인자는 output feature의 개수를 의미
# sequences는 window size와 동일하게 설계됨

output_features = 58
print(x_lstm1_tf.shape)
x_lstm2_tf_temp1 = keras.layers.LSTM(output_features, return_sequences=True)(x_lstm1_tf)
x_lstm2_tf_temp2 = keras.layers.LSTM(output_features, return_sequences=False)(x_lstm1_tf)

print(x_lstm2_tf_temp1.shape)
print(x_lstm2_tf_temp2.shape)

(32, 30, 60)
(32, 30, 58)
(32, 58)


In [29]:
model = keras.models.Sequential([
    # causal padding은 시계열 데이터에서 미래의 데이터를 미리 보는 일을 방지하기 위해 과거 데이터쪽으로만 padding을 추가
    keras.layers.Conv1D(filters=60, kernel_size=5, padding='causal', activation='relu', input_shape=[None, 1]),
    keras.layers.LSTM(60, return_sequences=True),
    keras.layers.LSTM(60, return_sequences=True),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1)
])

optimizer = keras.optimizers.SGD(lr=1e-5, momentum=0.9)
loss = keras.losses.Huber()
model.compile(loss=loss, optimizer=optimizer, metrics=['mae'])

# callback은 생략
model.fit(train_set, epochs=40, validation_data=valid_set)



Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.src.callbacks.History at 0x7e101c197280>

In [110]:
total_mae = 0
i = 100
count = 0
y_mean_list = []
y_pred_mean_list = []
for x, y in valid_set.take(i):
    y_pred = model(x)
    y_mean_list.append(y.numpy().mean())
    y_pred_mean_list.append(y_pred.numpy().mean())
    mae = np.mean(np.abs(y.numpy() - y_pred.numpy()))
    total_mae += mae
    count += 1
total_mae /= count
print(y_mean_list)
print(y_pred_mean_list)
print(total_mae)

[0.20798436715218482, 0.15421898543445506, 0.16226351916959653, 0.1926165138958647, 0.15383705842959988, 0.21573957810145655, 0.15849399219564966]
[0.21765175, 0.16638473, 0.17513427, 0.20348962, 0.16530155, 0.2234903, 0.16663487]
0.04641491220696688


# pytorch

In [16]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

class TimeSeriesDataset(Dataset):
    def __init__(self, series, window_size):
        self.series = series
        self.window_size = window_size

    def __len__(self):
        return len(self.series) - self.window_size

    def __getitem__(self, idx):
        X = self.series[idx : idx+self.window_size]
        y = self.series[idx+1 : idx+1+self.window_size]
        return X, y

window_size = 30
timeSeries_DS = TimeSeriesDataset(X_train, window_size)
x_torch, y_torch = timeSeries_DS[0]

In [17]:
x_torch

array([0.24284279, 0.26192868, 0.29306881, 0.23304872, 0.35585133,
       0.34957308, 0.39678553, 0.27749874, 0.31767956, 0.31592165,
       0.66373682, 0.35660472, 0.30688096, 0.31767956, 0.37343044,
       0.36966349, 0.37669513, 0.41863385, 0.35735811, 0.43119036,
       0.38171773, 0.27498744, 0.26494224, 0.31567052, 0.29306881,
       0.18206931, 0.18960321, 0.23606228, 0.25414365, 0.21220492])

In [21]:
timeSeries_loader = DataLoader(timeSeries_DS, batch_size=32, shuffle=True)
x_torch,y_torch = next(iter(timeSeries_loader))
x_torch.shape, y_torch.shape

(torch.Size([32, 30]), torch.Size([32, 30]))

In [None]:
x_torch[0,:10], y_torch[0,:10]

(tensor([0.3330, 0.2722, 0.3546, 0.2941, 0.3380, 0.2504, 0.2069, 0.4915, 0.4448,
         0.3478], dtype=torch.float64),
 tensor([0.2722, 0.3546, 0.2941, 0.3380, 0.2504, 0.2069, 0.4915, 0.4448, 0.3478,
         0.4367], dtype=torch.float64))

In [None]:
np.where(sunspots == x_torch[0,0].numpy())

(array([2617, 2749]),)

In [None]:
sunspots[2749:2749+window_size]

array([0.33299849, 0.27222501, 0.35459568, 0.29407333, 0.33802109,
       0.2503767 , 0.20693119, 0.49146158, 0.44475138, 0.34781517,
       0.43671522, 0.59241587, 0.48895028, 0.49045706, 0.36087393,
       0.47790055, 0.53164239, 0.5668006 , 0.50577599, 0.6702662 ,
       0.66197891, 0.65168257, 0.62682069, 0.56780512, 0.55097941,
       0.44876946, 0.58312406, 0.63962833, 0.5592667 , 0.48442993])

In [18]:
# causal padding을 지원하는 conv1d 층 구현
class CausalConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, **kwargs):
        super().__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, **kwargs)
        self.kernel_size = kernel_size

    def forward(self, x):
        # kernel_size - 1 만큼 패딩을 오른쪽(미래 데이터 측)에 추가
        padding = (0,self.kernel_size - 1)
        # pad has the form (padding_left,padding_right)
        x = F.pad(x, pad=padding, mode='constant')  # 'constant'는 0으로 패딩한다는 의미
        return self.conv(x)

In [19]:
# 참고
F.pad(torch.Tensor([1,2,3,4]), pad=(1,3), mode='constant')

tensor([0., 1., 2., 3., 4., 0., 0., 0.])

In [22]:
# 참고
print(x_torch.shape)
print(x_torch.unsqueeze(1).shape)
temp = CausalConv1d(in_channels=1, out_channels=60, kernel_size=5)
x_conv_torch = temp(x_torch.unsqueeze(1).float())
print(x_conv_torch.shape)

torch.Size([32, 30])
torch.Size([32, 1, 30])
torch.Size([32, 60, 30])


In [78]:
# 참고
# Note: batch_first=True makes the input and output tensors of shape (batch, seq, feature)
print(x_conv_torch.permute(0,2,1).shape)
x_lstm1_torch, _ = nn.LSTM(input_size=60, hidden_size=60, batch_first=True)(x_conv_torch.permute(0,2,1))
print(x_lstm1_torch.shape)
x_lstm2_torch, _ = nn.LSTM(input_size=60, hidden_size=60, batch_first=True)(x_lstm1_torch)
print(x_lstm2_torch.shape)
x_dense1_torch = nn.Linear(in_features=60, out_features=30)(x_lstm2_torch)
print(x_dense1_torch.shape)

torch.Size([32, 30, 60])
torch.Size([32, 30, 60])
torch.Size([32, 30, 60])
torch.Size([32, 30, 30])


In [94]:
class timeSeriesModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1d = CausalConv1d(in_channels=1, out_channels=60, kernel_size=5)

        # Note: batch_first=True makes the input and output tensors of shape (batch, seq, feature)
        self.lstm1 = nn.LSTM(input_size=60, hidden_size=60, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=60, hidden_size=60, batch_first=True)
        self.dense1 = nn.Linear(in_features=60, out_features=30)
        self.dense2 = nn.Linear(in_features=30, out_features=10)
        self.dense3 = nn.Linear(in_features=10, out_features=1)

    def forward(self, x):
        if x.dim()==2:
            # channel
            x = x.unsqueeze(1)
        x = torch.relu(self.conv1d(x.float()))
        x = x.permute(0, 2, 1)
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        x = torch.relu(self.dense1(x))
        x = torch.relu(self.dense2(x))
        x = self.dense3(x)

        return x


In [24]:
timeSeries_model = timeSeriesModel()
x_final = timeSeries_model(x_torch)
x_final.shape

torch.Size([32, 30, 1])

In [26]:
train_DS = TimeSeriesDataset(X_train, window_size)
train_loader = DataLoader(train_DS, batch_size=32, shuffle=True)
val_DS = TimeSeriesDataset(X_valid, window_size)
val_loader = DataLoader(val_DS, batch_size=32, shuffle=True)

In [107]:
timeSeries_model = timeSeriesModel()

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
timeSeries_model.to(device)
optimizer = torch.optim.Adam(timeSeries_model.parameters(), lr=1e-4)
criterion = nn.HuberLoss().to(device)

In [108]:
epochs = 40
for epoch in range(epochs):
    train_loss = 0
    train_mae = 0
    timeSeries_model.train()
    for inputs, targets in train_loader:
        inputs = inputs.float().to(device)
        targets = targets.float().to(device)
        outputs = timeSeries_model(inputs).squeeze()
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss +=  loss.item() * inputs.size(0)
        train_mae += torch.mean(torch.abs(targets - outputs)) * inputs.size(0)
    train_loss /= len(train_DS)
    train_mae /= len(train_DS)


    val_loss = 0
    val_mae = 0
    timeSeries_model.eval()
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.float().to(device)
            targets = targets.float().to(device)
            outputs = timeSeries_model(inputs).squeeze()
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
            val_mae += torch.mean(torch.abs(targets - outputs)) * inputs.size(0)
    val_loss /= len(val_DS)
    val_mae /= len(val_DS)

    print(f'''Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.6f}, Train MAE: {train_mae:.6f}, Val Loss: {val_loss:.6f}, Val MAE: {val_mae:.6f}''')

Epoch 1/40, Train Loss: 0.036912, Train MAE: 0.211939, Val Loss: 0.013287, Val MAE: 0.122806
Epoch 2/40, Train Loss: 0.012268, Train MAE: 0.119469, Val Loss: 0.005491, Val MAE: 0.078012
Epoch 3/40, Train Loss: 0.005303, Train MAE: 0.071190, Val Loss: 0.003191, Val MAE: 0.054093
Epoch 4/40, Train Loss: 0.003699, Train MAE: 0.057589, Val Loss: 0.002468, Val MAE: 0.046588
Epoch 5/40, Train Loss: 0.002991, Train MAE: 0.050911, Val Loss: 0.002008, Val MAE: 0.041179
Epoch 6/40, Train Loss: 0.002504, Train MAE: 0.046177, Val Loss: 0.001674, Val MAE: 0.037430
Epoch 7/40, Train Loss: 0.002144, Train MAE: 0.042657, Val Loss: 0.001426, Val MAE: 0.034640
Epoch 8/40, Train Loss: 0.001875, Train MAE: 0.039995, Val Loss: 0.001235, Val MAE: 0.032266
Epoch 9/40, Train Loss: 0.001667, Train MAE: 0.037903, Val Loss: 0.001091, Val MAE: 0.030491
Epoch 10/40, Train Loss: 0.001497, Train MAE: 0.036092, Val Loss: 0.000975, Val MAE: 0.028955
Epoch 11/40, Train Loss: 0.001352, Train MAE: 0.034491, Val Loss: 0.0

In [109]:
total_mae = 0
counts = 0
target_mean_list = []
output_mean_list = []
for inputs, targets in val_loader:
    inputs = inputs.float().to(device)
    targets = targets.float().to(device)
    outputs = timeSeries_model(inputs).squeeze()
    target_mean_list.append(targets.detach().cpu().numpy().mean())
    output_mean_list.append(outputs.detach().cpu().numpy().mean())
    mae = np.mean(np.abs(targets.cpu().detach().numpy() - outputs.detach().cpu().numpy()))
    total_mae += mae
    counts += 1
total_mae /= counts
print(target_mean_list)
print(output_mean_list)
print(total_mae)

[0.17258157, 0.15532605, 0.25249717, 0.16328713, 0.18399855, 0.14906769, 0.18286778]
[0.16927691, 0.15133025, 0.24866384, 0.15992118, 0.18036789, 0.1457216, 0.17859524]
0.006736655520009143
