In [1]:
import torch
import numpy as np
from torch import optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from preprocess import CMAPSSSlidingWin
from train import TrainUtil

In [2]:
MAX_RUL=150

In [3]:
class LSTMHCAtn(nn.Module):
    def __init__(self):
        super(LSTMHCAtn, self).__init__()
        self.lstm = nn.LSTM(batch_first=True, input_size=17, hidden_size=50, num_layers=1)
        self.attenion = Attention3dBlock()
        self.linear = nn.Sequential(
            nn.Linear(in_features=1500, out_features=50),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=50, out_features=10),
            nn.ReLU(inplace=True)
        )
        self.handcrafted = nn.Sequential(
            nn.Linear(in_features=34, out_features=10),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)
        )

        self.output = nn.Sequential(
            nn.Linear(in_features=20, out_features=1)
        )

    def forward(self, inputs, handcrafted_feature):
        y = self.handcrafted(handcrafted_feature)
        x, (hn, cn) = self.lstm(inputs)
        x = self.attenion(x)
        # flatten
        x = x.reshape(-1, 1500)
        x = self.linear(x)
        out = torch.concat((x, y), dim=1)
        out = self.output(out)
        return out


class Attention3dBlock(nn.Module):
    def __init__(self):
        super(Attention3dBlock, self).__init__()

        self.linear = nn.Sequential(
            nn.Linear(in_features=30, out_features=30),
            nn.Softmax(dim=2),
        )

    # inputs: batch size * window size(time step) * lstm output dims
    def forward(self, inputs):
        x = inputs.permute(0, 2, 1)
        x = self.linear(x)
        x_probs = x.permute(0, 2, 1)
        # print(torch.sum(x_probs.item()))
        output = x_probs * inputs
        return output

In [4]:
class LSTMBaseline(nn.Module):
    def __init__(self, n_features, n_hidden=64, n_layers=2):
        super(LSTMBaseline, self).__init__()

        self.n_hidden = n_hidden
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            batch_first=True,
            num_layers=n_layers,
 
        )
        self.linear = nn.Sequential(
            nn.Linear(in_features=30*self.n_hidden, out_features=8),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=8, out_features=8),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=8, out_features=1)
        )

        self.printed = False
        
    def forward(self, x):
        lstm_output, (hidden, _) = self.lstm(x)
        if not self.printed:
            print("output shape:", list(lstm_output.shape), "hidden shape:", list(hidden.shape))
            self.printed=True
        lstm_out = lstm_output.reshape(-1, self.n_hidden*30)  # output last hidden state output
        y_pred = self.linear(lstm_out)

        return y_pred

In [5]:
class AttentionBlock(nn.Module):
    def __init__(self, time_steps=30):
        super(AttentionBlock, self).__init__()

        self.linear = nn.Sequential(
            nn.Linear(time_steps, time_steps),
            nn.Softmax(dim=2),
            ##nn.Sigmoid()
        )

    # inputs: batch size * window size(time step) * lstm output dims
    def forward(self, inputs):
        x = inputs.permute(0, 2, 1)
        x = self.linear(x)
        x_probs = x.permute(0, 2, 1)
        #print("probs")
        #print(x_probs)
        #print()
        # print(torch.sum(x_probs.item()))
        output = x_probs * inputs
        return output

    
class LSTMBaselineAtn(nn.Module):
    def __init__(self, n_features, time_steps, n_hidden=64, n_layers=2):
        super(LSTMBaselineAtn, self).__init__()

        self.n_hidden = n_hidden
        self.time_steps = time_steps
        self.n_features = n_features
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            batch_first=True,
            num_layers=n_layers
        )
        self.attention = AttentionBlock(time_steps)
        self.linear = nn.Sequential(
            nn.Linear(in_features=self.time_steps*self.n_hidden, out_features=8),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=8, out_features=8),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=8, out_features=1)
        )
        self.printed = False
        
    def forward(self, x):
        lstm_output, (hidden, _) = self.lstm(x)
        if not self.printed:
            print("output shape:", *lstm_output.shape, "hidden shape:", *hidden.shape)
            self.printed=True
            print()
        x = self.attention(lstm_output)
        #print(x)
        #print()
        #print(x.shape)
        x = x.reshape(-1, self.time_steps*self.n_hidden)
        #print("x shape", x.shape)
        lstm_out = hidden[-1]  # output last hidden state output
        y_pred = self.linear(x)
        
        return y_pred

In [6]:
trainset = CMAPSSSlidingWin(mode='train',
                               data_path='./CMAPSSData/train_FD004.txt', max_rul=MAX_RUL)
train_loader = DataLoader(dataset=trainset, batch_size=100, shuffle=True, num_workers=2)

testset = CMAPSSSlidingWin(mode='test',
                              data_path='./CMAPSSData/test_FD004.txt',
                              rul_path='./CMAPSSData/RUL_FD004.txt',  max_rul=MAX_RUL)
test_loader = DataLoader(dataset=testset, batch_size=64, shuffle=False, num_workers=2)

print('dataset load successfully!')
print(next(iter(trainset))[0].shape, next(iter(trainset))[1].shape)

dataset load successfully!
torch.Size([30, 17]) torch.Size([1])


In [7]:
trainset.y.shape

(54028,)

In [8]:
testset.x.shape

(248, 30, 17)

In [10]:
model = LSTMBaseline(n_features=17, n_hidden=32)
optimizer = optim.Adam(model.parameters(), lr=3e-6)
epochs = 32

trainer = TrainUtil(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(32)

Device: cuda
output shape: [100, 30, 32] hidden shape: [2, 100, 32]
Epoch: 1 train loss: 80.316 val loss: 65.786 score: 3105039.688
Epoch: 2 train loss: 71.161 val loss: 55.719 score: 529188.508
Epoch: 3 train loss: 60.608 val loss: 49.873 score: 84126.236
Epoch: 4 train loss: 55.235 val loss: 49.975 score: 41513.540
Epoch: 5 train loss: 54.358 val loss: 50.567 score: 42072.260
Epoch: 6 train loss: 54.157 val loss: 50.587 score: 42106.857
Epoch: 7 train loss: 54.101 val loss: 50.648 score: 42303.401
Epoch: 8 train loss: 54.016 val loss: 50.573 score: 41973.726
Epoch: 9 train loss: 53.834 val loss: 50.561 score: 41881.333
Epoch: 10 train loss: 53.809 val loss: 50.497 score: 41604.235
Epoch: 11 train loss: 53.716 val loss: 50.490 score: 41523.540
Epoch: 12 train loss: 53.633 val loss: 50.483 score: 41442.923
Epoch: 13 train loss: 53.327 val loss: 50.303 score: 40818.409
Epoch: 14 train loss: 53.457 val loss: 50.300 score: 40734.299
Epoch: 15 train loss: 53.266 val loss: 50.237 score: 404

In [16]:
history

{'train_loss': [45.78263541201871,
  32.42722825831445,
  30.66666656650857,
  29.914496532249096,
  29.422632867842005,
  28.995562643621128,
  28.75260589165381,
  28.46332246092432,
  28.20316802295496,
  27.863100434878405,
  27.8065462227606,
  27.595207913354685,
  27.394709008524956,
  27.14947480219119,
  27.119271047706388,
  27.089987485191696,
  26.999886460779166,
  26.90291889299804,
  26.91312721682428,
  26.84738944982719,
  26.75565819121115,
  26.662291557319143,
  26.597242827750698,
  26.600093661541685,
  26.490660304392517,
  26.372250071696023,
  26.241276536703946,
  25.945287685403105,
  25.776520199737966,
  25.4642649281427,
  25.145222307392185,
  24.739649406902775],
 'val_loss': [tensor(35.2570, device='cuda:0'),
  tensor(32.2800, device='cuda:0'),
  tensor(31.0330, device='cuda:0'),
  tensor(31.3740, device='cuda:0'),
  tensor(31.4569, device='cuda:0'),
  tensor(30.8994, device='cuda:0'),
  tensor(31.2649, device='cuda:0'),
  tensor(30.6302, device='cuda:0

In [11]:
trainset = CMAPSSSlidingWin(mode='train',
                               data_path='./CMAPSSData/train_FD004.txt', max_rul=MAX_RUL, handcrafted=True)
train_loader = DataLoader(dataset=trainset, batch_size=100, shuffle=True, num_workers=2)

testset = CMAPSSSlidingWin(mode='test',
                              data_path='./CMAPSSData/test_FD004.txt',
                              rul_path='./CMAPSSData/RUL_FD004.txt',  max_rul=MAX_RUL, handcrafted=True)
test_loader = DataLoader(dataset=testset, batch_size=64, shuffle=False, num_workers=2)

print('dataset load successfully!')
print(next(iter(trainset))[0].shape, next(iter(trainset))[1].shape)

dataset load successfully!
torch.Size([30, 17]) torch.Size([34])


In [12]:
trainset.y.shape

(54028,)

In [13]:
testset.x.shape

(248, 30, 17)

In [14]:
testset.hc.shape

(248, 34)

In [15]:
model = LSTMHCAtn()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
epochs = 32

trainer = TrainUtil(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0,
                    handcrafted=True)
history = trainer.train(32)

Device: cuda
Epoch: 1 train loss: 45.783 val loss: 35.257 score: 12791.688
Epoch: 2 train loss: 32.427 val loss: 32.280 score: 8363.284
Epoch: 3 train loss: 30.667 val loss: 31.033 score: 6974.557
Epoch: 4 train loss: 29.914 val loss: 31.374 score: 6821.144
Epoch: 5 train loss: 29.423 val loss: 31.457 score: 7539.985
Epoch: 6 train loss: 28.996 val loss: 30.899 score: 6803.008
Epoch: 7 train loss: 28.753 val loss: 31.265 score: 6293.000
Epoch: 8 train loss: 28.463 val loss: 30.630 score: 6007.481
Epoch: 9 train loss: 28.203 val loss: 31.817 score: 7570.173
Epoch: 10 train loss: 27.863 val loss: 30.353 score: 5444.349
Epoch: 11 train loss: 27.807 val loss: 30.393 score: 5673.795
Epoch: 12 train loss: 27.595 val loss: 30.474 score: 5986.773
Epoch: 13 train loss: 27.395 val loss: 29.316 score: 5545.537
Epoch: 14 train loss: 27.149 val loss: 30.165 score: 5852.758
Epoch: 15 train loss: 27.119 val loss: 30.250 score: 6627.006
Epoch: 16 train loss: 27.090 val loss: 29.490 score: 5394.929
Epo

In [77]:
MAX_RUL = 130
trainset = CMAPSSDatasetHC(mode='train',
                               data_path='./CMAPSSData/train_FD001.txt', max_rul=MAX_RUL)
train_loader = DataLoader(dataset=trainset, batch_size=100, shuffle=True, num_workers=2)

testset = CMAPSSDatasetHC(mode='test',
                              data_path='./CMAPSSData/test_FD001.txt',
                              rul_path='./CMAPSSData/RUL_FD001.txt',  max_rul=MAX_RUL)
test_loader = DataLoader(dataset=testset, batch_size=64, shuffle=False, num_workers=2)

print('dataset load successfully!')
print(next(iter(trainset))[0].shape, next(iter(trainset))[1].shape)

dataset load successfully!
torch.Size([30, 17]) torch.Size([34])


In [72]:
trainset.x.shape

(17731, 30, 17)

In [73]:
testset.x.shape

(100, 30, 17)

In [129]:
model = LSTMBaseline(n_features=17, n_hidden=32)
optimizer = optim.Adam(model.parameters(), lr=3e-6)
epochs = 60

trainer = TrainUtilHC(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(epochs)

Device: cuda
output shape: [100, 30, 32] hidden shape: [2, 100, 32]
Epoch: 1 train loss: 77.205 val loss: 69.426 score: 1070242.781
Epoch: 2 train loss: 76.381 val loss: 68.564 score: 962208.812
Epoch: 3 train loss: 75.465 val loss: 67.602 score: 853579.750
Epoch: 4 train loss: 74.394 val loss: 66.494 score: 742736.125
Epoch: 5 train loss: 73.321 val loss: 65.408 score: 647193.250
Epoch: 6 train loss: 72.391 val loss: 64.273 score: 559238.828
Epoch: 7 train loss: 71.427 val loss: 63.262 score: 490472.938
Epoch: 8 train loss: 70.201 val loss: 62.019 score: 416701.797
Epoch: 9 train loss: 68.404 val loss: 60.327 score: 332457.266
Epoch: 10 train loss: 66.186 val loss: 58.243 score: 249666.172
Epoch: 11 train loss: 63.788 val loss: 55.859 score: 177549.102
Epoch: 12 train loss: 61.144 val loss: 53.258 score: 119831.699
Epoch: 13 train loss: 58.355 val loss: 50.574 score: 77342.082
Epoch: 14 train loss: 55.381 val loss: 47.939 score: 47946.516
Epoch: 15 train loss: 53.090 val loss: 45.664 

In [65]:
model = LSTMBaselineAtn(n_features=17, n_hidden=50, time_steps=30)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
epochs = 20

trainer = TrainUtilHC(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(epochs)

Device: cuda
output shape: 100 30 50 hidden shape: 2 100 50

Epoch: 1 train loss: 74.438 val loss: 62.626 score: 1887591.844
Epoch: 2 train loss: 70.535 val loss: 54.540 score: 402725.047
Epoch: 3 train loss: 56.740 val loss: 49.532 score: 50010.076
Epoch: 4 train loss: 51.770 val loss: 51.122 score: 44304.367
Epoch: 5 train loss: 51.382 val loss: 51.263 score: 45131.449
Epoch: 6 train loss: 51.226 val loss: 51.290 score: 45298.317
Epoch: 7 train loss: 51.180 val loss: 51.321 score: 45489.186
Epoch: 8 train loss: 51.136 val loss: 51.347 score: 45652.155
Epoch: 9 train loss: 51.078 val loss: 51.352 score: 45687.467
Epoch: 10 train loss: 51.014 val loss: 51.321 score: 45491.335
Epoch: 11 train loss: 51.008 val loss: 51.228 score: 44919.338
Epoch: 12 train loss: 50.913 val loss: 51.237 score: 44971.194
Epoch: 13 train loss: 50.967 val loss: 51.192 score: 44706.427
Epoch: 14 train loss: 50.904 val loss: 51.379 score: 45858.915
Epoch: 15 train loss: 50.912 val loss: 51.204 score: 44773.965


In [130]:
model = LSTMHCAtn()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
epochs = 40

trainer = TrainUtilHC(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(epochs)

Device: cuda
Epoch: 1 train loss: 102.100 val loss: 91.346 score: 30705721.000
Epoch: 2 train loss: 99.609 val loss: 88.976 score: 19733672.000
Epoch: 3 train loss: 96.735 val loss: 85.774 score: 11791595.250
Epoch: 4 train loss: 93.061 val loss: 81.243 score: 6189282.750
Epoch: 5 train loss: 87.411 val loss: 74.190 score: 2485348.750
Epoch: 6 train loss: 77.470 val loss: 61.318 score: 497107.562
Epoch: 7 train loss: 65.036 val loss: 49.634 score: 102145.559
Epoch: 8 train loss: 54.394 val loss: 40.299 score: 25159.545
Epoch: 9 train loss: 46.150 val loss: 33.690 score: 7725.400
Epoch: 10 train loss: 40.263 val loss: 29.873 score: 3260.595
Epoch: 11 train loss: 36.711 val loss: 27.956 score: 1943.964
Epoch: 12 train loss: 34.821 val loss: 26.915 score: 1481.059
Epoch: 13 train loss: 33.228 val loss: 26.081 score: 1260.547
Epoch: 14 train loss: 32.477 val loss: 25.303 score: 1116.995
Epoch: 15 train loss: 31.501 val loss: 24.543 score: 1005.788
Epoch: 16 train loss: 30.801 val loss: 23.

In [4]:
class TimeSeriesDataset(Dataset):   
    def __init__(self, X, y, seq_len=1):
        self.X = X
        self.y = y
        self.seq_len = seq_len

    def __len__(self):
        return self.X.__len__() - self.seq_len

    def __getitem__(self, index):
        return self.X[index:index+self.seq_len], self.y[index+self.seq_len]

In [78]:
def train_model(
        model,
        train_df,
        test_df,
        label_name,
        sequence_length,
        batch_size,
        n_epochs,
        n_epochs_stop,
        lr
):
    """Train LSTM model."""
    print("Starting with model training...")

    # create dataloaders
    train_dataset = TimeSeriesDataset(np.array(train_df), np.array(train_df[label_name]), seq_len=sequence_length)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    test_dataset = TimeSeriesDataset(np.array(test_df), np.array(test_df[label_name]), seq_len=sequence_length)
    test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

    # set up training
    #n_features = train_df.shape[1]
    #model = TSModel(n_features)
    criterion = torch.nn.MSELoss()  # L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_hist = []
    test_hist = []

    # start training
    best_loss = np.inf
    epochs_no_improve = 0
    for epoch in range(1, n_epochs+1):
        running_loss = 0
        model.train()

        for batch_idx, (data, target) in enumerate(train_loader, 1):
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            data = torch.Tensor(np.array(data))
            
            if (epoch == 1) and (batch_idx == 1):
                print("input shape:", *data.shape)
            
            output = model(data)
            
            if (epoch == 1) and (batch_idx == 1):
                print("model output:", *output.shape)
                print()
            
            loss = criterion(output.flatten(), target.type_as(output))
            # if type(criterion) == torch.nn.modules.loss.MSELoss:
            #     loss = torch.sqrt(loss)  # RMSE
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        running_loss /= len(train_loader)
        train_hist.append(running_loss)

        # test loss
        model.eval()
        test_loss = 0
        with torch.no_grad():
            for data, target in test_loader:
                data = torch.Tensor(np.array(data))
                output = model(data)
                loss = criterion(output.flatten(), target.type_as(output))
                test_loss += loss.item()
            test_loss /= len(test_loader)
            test_hist.append(test_loss)

            # early stopping
            if test_loss < best_loss:
                best_loss = test_loss
                torch.save(model.state_dict(), Path(model_dir, 'model.pt'))
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1
            if epochs_no_improve == n_epochs_stop:
                print("Early stopping.")
                break

        print(f'Epoch {epoch} train loss: {round(running_loss,4)} test loss: {round(test_loss,4)}')

        hist = pd.DataFrame()
        hist['training_loss'] = train_hist
        hist['test_loss'] = test_hist

    print("Completed.")

    return hist


In [52]:
df_train = pd.read_csv("../data/train.csv")
df_train

Unnamed: 0,Close,Volume,High_Low_Pct,Open_Close_Pct,Day_Of_Week,Month_Of_Year,Quarter_Of_Year
0,0.000000,0.008925,0.081764,0.193811,0.833333,1.0,1.0
1,0.006946,0.020057,0.047454,0.192291,1.000000,1.0,1.0
2,0.009084,0.011096,0.000000,0.189650,0.000000,1.0,1.0
3,0.012580,0.023945,0.019690,0.191727,0.166667,1.0,1.0
4,0.051411,0.077130,0.239106,0.186352,0.333333,1.0,1.0
...,...,...,...,...,...,...,...
360,0.654109,0.044822,0.048717,0.171630,0.166667,1.0,1.0
361,0.650104,0.029176,0.084043,0.159411,0.333333,1.0,1.0
362,0.592014,0.032559,0.141697,0.171933,0.500000,1.0,1.0
363,0.583220,0.036661,0.119429,0.201692,0.666667,1.0,1.0


In [53]:
df_test = pd.read_csv("../data/test.csv")
df_test

Unnamed: 0,Close,Volume,High_Low_Pct,Open_Close_Pct,Day_Of_Week,Month_Of_Year,Quarter_Of_Year
0,0.641769,0.009487,0.063496,0.198277,1.000000,1.0,1.0
1,0.572847,0.040276,0.194433,0.172954,0.000000,1.0,1.0
2,0.570286,0.047717,0.068952,1.233623,0.166667,1.0,1.0
3,0.617127,0.053446,0.110560,0.191364,0.333333,1.0,1.0
4,0.591876,0.025529,0.060670,0.182496,0.500000,1.0,1.0
...,...,...,...,...,...,...,...
360,-0.040103,0.002671,0.014960,0.187984,0.333333,1.0,1.0
361,-0.032200,0.005144,0.032581,0.187136,0.500000,1.0,1.0
362,-0.034257,0.004637,-0.012056,0.190973,0.666667,1.0,1.0
363,-0.034348,-0.018307,-0.024171,0.190842,0.833333,1.0,1.0


In [54]:
df = pd.read_csv("../data/BTC-USD.csv")
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-12-12,18051.320313,18919.550781,18046.041016,18803.656250,18803.656250,21752580802
1,2020-12-13,18806.765625,19381.535156,18734.332031,19142.382813,19142.382813,25450468637
2,2020-12-14,19144.492188,19305.099609,19012.708984,19246.644531,19246.644531,22473997681
3,2020-12-15,19246.919922,19525.007813,19079.841797,19417.076172,19417.076172,26741982541
4,2020-12-16,19418.818359,21458.908203,19298.316406,21310.597656,21310.597656,44409011479
...,...,...,...,...,...,...,...
726,2022-12-08,16847.349609,17267.916016,16788.783203,17233.474609,17233.474609,20496603770
727,2022-12-09,17232.148438,17280.546875,17100.835938,17133.152344,17133.152344,20328426366
728,2022-12-10,17134.220703,17216.826172,17120.683594,17128.724609,17128.724609,12706781969
729,2022-12-11,17129.710938,17245.634766,17091.820313,17104.193359,17104.193359,14122486832


In [55]:
2*365

730

In [111]:
train_model(
        TSModel(df_train.shape[1]),
        df_train,
        df_test,
        "Close",
        sequence_length=30,
        batch_size=10,
        n_epochs=40,
        n_epochs_stop=20,
        lr=0.001
)

Starting with model training...
input shape: 10 30 7
output shape: 10 30 64 hidden shape: 2 10 64
model output: 10 1

Epoch 1 train loss: 0.1495 test loss: 0.2826
Epoch 2 train loss: 0.0552 test loss: 0.1772
Epoch 3 train loss: 0.0611 test loss: 0.1177
Epoch 4 train loss: 0.0655 test loss: 0.1097
Epoch 5 train loss: 0.0517 test loss: 0.1274
Epoch 6 train loss: 0.046 test loss: 0.1184
Epoch 7 train loss: 0.0396 test loss: 0.1027
Epoch 8 train loss: 0.0378 test loss: 0.0871
Epoch 9 train loss: 0.0323 test loss: 0.0674
Epoch 10 train loss: 0.0285 test loss: 0.04
Epoch 11 train loss: 0.0244 test loss: 0.0245
Epoch 12 train loss: 0.011 test loss: 0.0196
Epoch 13 train loss: 0.0099 test loss: 0.0512
Epoch 14 train loss: 0.0113 test loss: 0.0423
Epoch 15 train loss: 0.006 test loss: 0.0314
Epoch 16 train loss: 0.0057 test loss: 0.0288
Epoch 17 train loss: 0.0055 test loss: 0.026
Epoch 18 train loss: 0.0057 test loss: 0.022
Epoch 19 train loss: 0.0053 test loss: 0.029
Epoch 20 train loss: 0.00

Unnamed: 0,training_loss,test_loss
0,0.14951,0.282596
1,0.055157,0.177239
2,0.061057,0.117689
3,0.065541,0.109684
4,0.051653,0.127381
5,0.045953,0.118389
6,0.039573,0.102711
7,0.037757,0.08715
8,0.032257,0.067441
9,0.028499,0.040018


In [133]:
class AttentionBlock(nn.Module):
    def __init__(self, time_steps=30):
        super(AttentionBlock, self).__init__()

        self.linear = nn.Sequential(
            nn.Linear(time_steps, time_steps),
            #nn.Softmax(dim=2),
            nn.Sigmoid()
        )

    # inputs: batch size * window size(time step) * lstm output dims
    def forward(self, inputs):
        x = inputs.permute(0, 2, 1)
        x = self.linear(x)
        x_probs = x.permute(0, 2, 1)
        #print("probs")
        #print(x_probs)
        #print()
        # print(torch.sum(x_probs.item()))
        output = x_probs * inputs
        return output

    
class TSModelAttention(nn.Module):
    def __init__(self, n_features, time_steps, n_hidden=64, n_layers=2):
        super(TSModelAttention, self).__init__()

        self.n_hidden = n_hidden
        self.time_steps = time_steps
        self.n_features = n_features
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            batch_first=True,
            num_layers=n_layers
        )
        self.attention = AttentionBlock(time_steps)
        self.linear = nn.Linear(self.time_steps*self.n_hidden, 1)
        self.printed = False
        
    def forward(self, x):
        lstm_output, (hidden, _) = self.lstm(x)
        if not self.printed:
            print("output shape:", *lstm_output.shape, "hidden shape:", *hidden.shape)
            self.printed=True
            print()
        x = self.attention(lstm_output)
        #print(x)
        #print()
        #print(x.shape)
        x = x.reshape(-1, self.time_steps*self.n_hidden)
        #print("x shape", x.shape)
        lstm_out = hidden[-1]  # output last hidden state output
        y_pred = self.linear(x)
        
        return y_pred

In [135]:
train_model(
        TSModelAttention(df_train.shape[1],  30, n_hidden=64, n_layers=1),
        df_train,
        df_test,
        "Close",
        sequence_length=30,
        batch_size=10,
        n_epochs=30,
        n_epochs_stop=20,
        lr=0.001
)

Starting with model training...
input shape: 10 30 7
output shape: 10 30 64 hidden shape: 1 10 64

model output: 10 1

Epoch 1 train loss: 0.137 test loss: 0.1445
Epoch 2 train loss: 0.0605 test loss: 0.1355
Epoch 3 train loss: 0.0612 test loss: 0.1225
Epoch 4 train loss: 0.0504 test loss: 0.1176
Epoch 5 train loss: 0.0432 test loss: 0.1059
Epoch 6 train loss: 0.0393 test loss: 0.0937
Epoch 7 train loss: 0.036 test loss: 0.0824
Epoch 8 train loss: 0.032 test loss: 0.0712
Epoch 9 train loss: 0.0274 test loss: 0.0597
Epoch 10 train loss: 0.0226 test loss: 0.0484
Epoch 11 train loss: 0.0178 test loss: 0.0376
Epoch 12 train loss: 0.0134 test loss: 0.0284
Epoch 13 train loss: 0.0101 test loss: 0.0243
Epoch 14 train loss: 0.0082 test loss: 0.0267
Epoch 15 train loss: 0.0076 test loss: 0.0312
Epoch 16 train loss: 0.0073 test loss: 0.0344
Epoch 17 train loss: 0.0071 test loss: 0.0353
Epoch 18 train loss: 0.0073 test loss: 0.035
Epoch 19 train loss: 0.0081 test loss: 0.0338
Epoch 20 train loss:

Unnamed: 0,training_loss,test_loss
0,0.137022,0.144499
1,0.060537,0.135549
2,0.0612,0.122494
3,0.050381,0.117582
4,0.043236,0.105892
5,0.039307,0.093682
6,0.035971,0.082431
7,0.032012,0.071198
8,0.027418,0.059735
9,0.022584,0.048439
