In [1]:
import torch
import numpy as np
from torch import optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from preprocess import CMAPSSDataset
from train import TrainUtil

In [2]:
MAX_RUL=150

In [3]:
trainset = CMAPSSDataset(mode='train',
                               data_path='./CMAPSSData/train_FD004.txt', max_rul=MAX_RUL)
train_loader = DataLoader(dataset=trainset, batch_size=100, shuffle=True, num_workers=2)

testset = CMAPSSDataset(mode='test',
                              data_path='./CMAPSSData/test_FD004.txt',
                              rul_path='./CMAPSSData/RUL_FD004.txt',  max_rul=MAX_RUL)
test_loader = DataLoader(dataset=testset, batch_size=64, shuffle=False, num_workers=2)
print('dataset load successfully!')

dataset load successfully!


In [4]:
next(iter(trainset))[0].shape

torch.Size([30, 22])

In [11]:
len(trainset)

54028

In [42]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.lstm = nn.LSTM(batch_first=True, input_size=22, hidden_size=50, num_layers=1)
        self.attenion = Attention3dBlock()
        self.linear = nn.Sequential(
            nn.Linear(in_features=1500, out_features=50),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=50, out_features=10),
            nn.ReLU(inplace=True)
        )
        self.handcrafted = nn.Sequential(
            nn.Linear(in_features=34, out_features=10),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2)
        )

        self.output = nn.Sequential(
            nn.Linear(in_features=10, out_features=1)
        )

    def forward(self, inputs):
        #y = self.handcrafted(handcrafted_feature)
        x, (hn, cn) = self.lstm(inputs)
        x = self.attenion(x)
        # flatten
        x = x.reshape(-1, 1500)
        x = self.linear(x)
        #out = torch.concat((x, y), dim=1)
        out = self.output(x)
        return out


class Attention3dBlock(nn.Module):
    def __init__(self):
        super(Attention3dBlock, self).__init__()

        self.linear = nn.Sequential(
            nn.Linear(in_features=30, out_features=30),
            nn.Softmax(dim=2),
        )

    # inputs: batch size * window size(time step) * lstm output dims
    def forward(self, inputs):
        x = inputs.permute(0, 2, 1)
        x = self.linear(x)
        x_probs = x.permute(0, 2, 1)
        # print(torch.sum(x_probs.item()))
        output = x_probs * inputs
        return output

In [15]:
class LSTMBaseline(nn.Module):
    def __init__(self, n_features, n_hidden=64, n_layers=2):
        super(LSTMBaseline, self).__init__()

        self.n_hidden = n_hidden
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            batch_first=True,
            num_layers=n_layers,
 
        )
        self.linear = nn.Linear(n_hidden, 1)
        self.printed = False
        
    def forward(self, x):
        lstm_output, (hidden, _) = self.lstm(x)
        if not self.printed:
            print("output shape:", list(lstm_output.shape), "hidden shape:", list(hidden.shape))
            self.printed=True
        lstm_out = hidden[-1]  # output last hidden state output
        y_pred = self.linear(lstm_out)
        
        return y_pred

In [31]:
class AttentionBlock(nn.Module):
    def __init__(self, time_steps=30):
        super(AttentionBlock, self).__init__()

        self.linear = nn.Sequential(
            nn.Linear(time_steps, time_steps),
            nn.Softmax(dim=2),
            ##nn.Sigmoid()
        )

    # inputs: batch size * window size(time step) * lstm output dims
    def forward(self, inputs):
        x = inputs.permute(0, 2, 1)
        x = self.linear(x)
        x_probs = x.permute(0, 2, 1)
        #print("probs")
        #print(x_probs)
        #print()
        # print(torch.sum(x_probs.item()))
        output = x_probs * inputs
        return output

    
class LSTMBaselineAtn(nn.Module):
    def __init__(self, n_features, time_steps, n_hidden=64, n_layers=2):
        super(LSTMBaselineAtn, self).__init__()

        self.n_hidden = n_hidden
        self.time_steps = time_steps
        self.n_features = n_features
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            batch_first=True,
            num_layers=n_layers
        )
        self.attention = AttentionBlock(time_steps)
        self.linear = nn.Linear(self.time_steps*self.n_hidden, 1)
        self.printed = False
        
    def forward(self, x):
        lstm_output, (hidden, _) = self.lstm(x)
        if not self.printed:
            print("output shape:", *lstm_output.shape, "hidden shape:", *hidden.shape)
            self.printed=True
            print()
        x = self.attention(lstm_output)
        #print(x)
        #print()
        #print(x.shape)
        x = x.reshape(-1, self.time_steps*self.n_hidden)
        #print("x shape", x.shape)
        lstm_out = hidden[-1]  # output last hidden state output
        y_pred = self.linear(x)
        
        return y_pred

In [43]:
model = Model()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
epochs = 32

trainer = TrainUtil(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(20)

Device: cuda
Epoch: 1 train loss: 60.398 val loss: 46.721 score: 4899890.666
Epoch: 2 train loss: 51.455 val loss: 55.754 score: 16791089.901
Epoch: 3 train loss: 48.680 val loss: 68.140 score: 166434274.449
Epoch: 4 train loss: 38.928 val loss: 83.113 score: 1578939244.472
Epoch: 5 train loss: 33.879 val loss: 90.631 score: 3523218182.841
Epoch: 6 train loss: 32.152 val loss: 91.014 score: 2989766648.366
Epoch: 7 train loss: 31.749 val loss: 85.256 score: 1993731724.821
Epoch: 8 train loss: 30.791 val loss: 86.236 score: 2045667178.238
Epoch: 9 train loss: 30.334 val loss: 85.755 score: 2300766282.335
Epoch: 10 train loss: 29.851 val loss: 80.966 score: 1456690902.647
Epoch: 11 train loss: 29.106 val loss: 90.327 score: 2765739482.704
Epoch: 12 train loss: 28.281 val loss: 79.570 score: 1177286505.307
Epoch: 13 train loss: 27.472 val loss: 73.915 score: 915678121.952
Epoch: 14 train loss: 26.813 val loss: 85.499 score: 2498863651.983
Epoch: 15 train loss: 26.328 val loss: 86.783 score

In [35]:
model = LSTMBaseline(n_features=22, n_hidden=50)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
epochs = 32

trainer = TrainUtil(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(20)

Device: cuda
output shape: [100, 30, 50] hidden shape: [2, 100, 50]
Epoch: 1 train loss: 48.352 val loss: 60.971 score: 1077267619.923
Epoch: 2 train loss: 35.143 val loss: 61.202 score: 500542838.816
Epoch: 3 train loss: 32.560 val loss: 86.988 score: 2528709578.324
Epoch: 4 train loss: 30.943 val loss: 76.572 score: 969228532.908
Epoch: 5 train loss: 29.314 val loss: 73.114 score: 867417587.161
Epoch: 6 train loss: 27.514 val loss: 72.741 score: 930059737.664
Epoch: 7 train loss: 26.232 val loss: 66.006 score: 790924689.658
Epoch: 8 train loss: 25.889 val loss: 54.566 score: 228617503.638
Epoch: 9 train loss: 25.364 val loss: 75.750 score: 1095891555.224
Epoch: 10 train loss: 24.814 val loss: 48.203 score: 185736556.502
Epoch: 11 train loss: 24.704 val loss: 61.490 score: 356211452.942
Epoch: 12 train loss: 24.113 val loss: 68.916 score: 802452932.124
Epoch: 13 train loss: 24.210 val loss: 58.018 score: 355364834.323
Epoch: 14 train loss: 24.172 val loss: 62.473 score: 388352095.416


In [34]:
model = LSTMBaselineAtn(n_features=22, n_hidden=50, time_steps=30)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
epochs = 32

trainer = TrainUtil(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    test_loader=test_loader,
                    max_rul=MAX_RUL,
                    verbosity=0)
history = trainer.train(20)

Device: cuda
output shape: 100 30 50 hidden shape: 2 100 50

Epoch: 1 train loss: 108.460 val loss: 128.067 score: 52029869613.278
Epoch: 2 train loss: 107.755 val loss: 127.213 score: 47594973169.934
Epoch: 3 train loss: 106.821 val loss: 126.002 score: 41939935045.824
Epoch: 4 train loss: 105.426 val loss: 124.100 score: 34383822370.318
Epoch: 5 train loss: 103.155 val loss: 120.888 score: 24576668589.720
Epoch: 6 train loss: 99.297 val loss: 115.387 score: 13809909046.618
Epoch: 7 train loss: 93.089 val loss: 106.926 score: 5660509806.391
Epoch: 8 train loss: 84.704 val loss: 96.439 score: 1847246525.604
Epoch: 9 train loss: 75.849 val loss: 86.052 score: 595489732.079
Epoch: 10 train loss: 68.133 val loss: 76.826 score: 212151532.140
Epoch: 11 train loss: 61.989 val loss: 68.861 score: 84480327.649
Epoch: 12 train loss: 57.380 val loss: 62.111 score: 37483192.700
Epoch: 13 train loss: 54.172 val loss: 56.572 score: 18638418.057
Epoch: 14 train loss: 52.179 val loss: 52.304 score: 1

In [4]:
class TimeSeriesDataset(Dataset):   
    def __init__(self, X, y, seq_len=1):
        self.X = X
        self.y = y
        self.seq_len = seq_len

    def __len__(self):
        return self.X.__len__() - self.seq_len

    def __getitem__(self, index):
        return self.X[index:index+self.seq_len], self.y[index+self.seq_len]

In [78]:
def train_model(
        model,
        train_df,
        test_df,
        label_name,
        sequence_length,
        batch_size,
        n_epochs,
        n_epochs_stop,
        lr
):
    """Train LSTM model."""
    print("Starting with model training...")

    # create dataloaders
    train_dataset = TimeSeriesDataset(np.array(train_df), np.array(train_df[label_name]), seq_len=sequence_length)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    test_dataset = TimeSeriesDataset(np.array(test_df), np.array(test_df[label_name]), seq_len=sequence_length)
    test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

    # set up training
    #n_features = train_df.shape[1]
    #model = TSModel(n_features)
    criterion = torch.nn.MSELoss()  # L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    train_hist = []
    test_hist = []

    # start training
    best_loss = np.inf
    epochs_no_improve = 0
    for epoch in range(1, n_epochs+1):
        running_loss = 0
        model.train()

        for batch_idx, (data, target) in enumerate(train_loader, 1):
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            data = torch.Tensor(np.array(data))
            
            if (epoch == 1) and (batch_idx == 1):
                print("input shape:", *data.shape)
            
            output = model(data)
            
            if (epoch == 1) and (batch_idx == 1):
                print("model output:", *output.shape)
                print()
            
            loss = criterion(output.flatten(), target.type_as(output))
            # if type(criterion) == torch.nn.modules.loss.MSELoss:
            #     loss = torch.sqrt(loss)  # RMSE
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        running_loss /= len(train_loader)
        train_hist.append(running_loss)

        # test loss
        model.eval()
        test_loss = 0
        with torch.no_grad():
            for data, target in test_loader:
                data = torch.Tensor(np.array(data))
                output = model(data)
                loss = criterion(output.flatten(), target.type_as(output))
                test_loss += loss.item()
            test_loss /= len(test_loader)
            test_hist.append(test_loss)

            # early stopping
            if test_loss < best_loss:
                best_loss = test_loss
                torch.save(model.state_dict(), Path(model_dir, 'model.pt'))
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1
            if epochs_no_improve == n_epochs_stop:
                print("Early stopping.")
                break

        print(f'Epoch {epoch} train loss: {round(running_loss,4)} test loss: {round(test_loss,4)}')

        hist = pd.DataFrame()
        hist['training_loss'] = train_hist
        hist['test_loss'] = test_hist

    print("Completed.")

    return hist


In [52]:
df_train = pd.read_csv("../data/train.csv")
df_train

Unnamed: 0,Close,Volume,High_Low_Pct,Open_Close_Pct,Day_Of_Week,Month_Of_Year,Quarter_Of_Year
0,0.000000,0.008925,0.081764,0.193811,0.833333,1.0,1.0
1,0.006946,0.020057,0.047454,0.192291,1.000000,1.0,1.0
2,0.009084,0.011096,0.000000,0.189650,0.000000,1.0,1.0
3,0.012580,0.023945,0.019690,0.191727,0.166667,1.0,1.0
4,0.051411,0.077130,0.239106,0.186352,0.333333,1.0,1.0
...,...,...,...,...,...,...,...
360,0.654109,0.044822,0.048717,0.171630,0.166667,1.0,1.0
361,0.650104,0.029176,0.084043,0.159411,0.333333,1.0,1.0
362,0.592014,0.032559,0.141697,0.171933,0.500000,1.0,1.0
363,0.583220,0.036661,0.119429,0.201692,0.666667,1.0,1.0


In [53]:
df_test = pd.read_csv("../data/test.csv")
df_test

Unnamed: 0,Close,Volume,High_Low_Pct,Open_Close_Pct,Day_Of_Week,Month_Of_Year,Quarter_Of_Year
0,0.641769,0.009487,0.063496,0.198277,1.000000,1.0,1.0
1,0.572847,0.040276,0.194433,0.172954,0.000000,1.0,1.0
2,0.570286,0.047717,0.068952,1.233623,0.166667,1.0,1.0
3,0.617127,0.053446,0.110560,0.191364,0.333333,1.0,1.0
4,0.591876,0.025529,0.060670,0.182496,0.500000,1.0,1.0
...,...,...,...,...,...,...,...
360,-0.040103,0.002671,0.014960,0.187984,0.333333,1.0,1.0
361,-0.032200,0.005144,0.032581,0.187136,0.500000,1.0,1.0
362,-0.034257,0.004637,-0.012056,0.190973,0.666667,1.0,1.0
363,-0.034348,-0.018307,-0.024171,0.190842,0.833333,1.0,1.0


In [54]:
df = pd.read_csv("../data/BTC-USD.csv")
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-12-12,18051.320313,18919.550781,18046.041016,18803.656250,18803.656250,21752580802
1,2020-12-13,18806.765625,19381.535156,18734.332031,19142.382813,19142.382813,25450468637
2,2020-12-14,19144.492188,19305.099609,19012.708984,19246.644531,19246.644531,22473997681
3,2020-12-15,19246.919922,19525.007813,19079.841797,19417.076172,19417.076172,26741982541
4,2020-12-16,19418.818359,21458.908203,19298.316406,21310.597656,21310.597656,44409011479
...,...,...,...,...,...,...,...
726,2022-12-08,16847.349609,17267.916016,16788.783203,17233.474609,17233.474609,20496603770
727,2022-12-09,17232.148438,17280.546875,17100.835938,17133.152344,17133.152344,20328426366
728,2022-12-10,17134.220703,17216.826172,17120.683594,17128.724609,17128.724609,12706781969
729,2022-12-11,17129.710938,17245.634766,17091.820313,17104.193359,17104.193359,14122486832


In [55]:
2*365

730

In [111]:
train_model(
        TSModel(df_train.shape[1]),
        df_train,
        df_test,
        "Close",
        sequence_length=30,
        batch_size=10,
        n_epochs=40,
        n_epochs_stop=20,
        lr=0.001
)

Starting with model training...
input shape: 10 30 7
output shape: 10 30 64 hidden shape: 2 10 64
model output: 10 1

Epoch 1 train loss: 0.1495 test loss: 0.2826
Epoch 2 train loss: 0.0552 test loss: 0.1772
Epoch 3 train loss: 0.0611 test loss: 0.1177
Epoch 4 train loss: 0.0655 test loss: 0.1097
Epoch 5 train loss: 0.0517 test loss: 0.1274
Epoch 6 train loss: 0.046 test loss: 0.1184
Epoch 7 train loss: 0.0396 test loss: 0.1027
Epoch 8 train loss: 0.0378 test loss: 0.0871
Epoch 9 train loss: 0.0323 test loss: 0.0674
Epoch 10 train loss: 0.0285 test loss: 0.04
Epoch 11 train loss: 0.0244 test loss: 0.0245
Epoch 12 train loss: 0.011 test loss: 0.0196
Epoch 13 train loss: 0.0099 test loss: 0.0512
Epoch 14 train loss: 0.0113 test loss: 0.0423
Epoch 15 train loss: 0.006 test loss: 0.0314
Epoch 16 train loss: 0.0057 test loss: 0.0288
Epoch 17 train loss: 0.0055 test loss: 0.026
Epoch 18 train loss: 0.0057 test loss: 0.022
Epoch 19 train loss: 0.0053 test loss: 0.029
Epoch 20 train loss: 0.00

Unnamed: 0,training_loss,test_loss
0,0.14951,0.282596
1,0.055157,0.177239
2,0.061057,0.117689
3,0.065541,0.109684
4,0.051653,0.127381
5,0.045953,0.118389
6,0.039573,0.102711
7,0.037757,0.08715
8,0.032257,0.067441
9,0.028499,0.040018


In [133]:
class AttentionBlock(nn.Module):
    def __init__(self, time_steps=30):
        super(AttentionBlock, self).__init__()

        self.linear = nn.Sequential(
            nn.Linear(time_steps, time_steps),
            #nn.Softmax(dim=2),
            nn.Sigmoid()
        )

    # inputs: batch size * window size(time step) * lstm output dims
    def forward(self, inputs):
        x = inputs.permute(0, 2, 1)
        x = self.linear(x)
        x_probs = x.permute(0, 2, 1)
        #print("probs")
        #print(x_probs)
        #print()
        # print(torch.sum(x_probs.item()))
        output = x_probs * inputs
        return output

    
class TSModelAttention(nn.Module):
    def __init__(self, n_features, time_steps, n_hidden=64, n_layers=2):
        super(TSModelAttention, self).__init__()

        self.n_hidden = n_hidden
        self.time_steps = time_steps
        self.n_features = n_features
        self.lstm = nn.LSTM(
            input_size=n_features,
            hidden_size=n_hidden,
            batch_first=True,
            num_layers=n_layers
        )
        self.attention = AttentionBlock(time_steps)
        self.linear = nn.Linear(self.time_steps*self.n_hidden, 1)
        self.printed = False
        
    def forward(self, x):
        lstm_output, (hidden, _) = self.lstm(x)
        if not self.printed:
            print("output shape:", *lstm_output.shape, "hidden shape:", *hidden.shape)
            self.printed=True
            print()
        x = self.attention(lstm_output)
        #print(x)
        #print()
        #print(x.shape)
        x = x.reshape(-1, self.time_steps*self.n_hidden)
        #print("x shape", x.shape)
        lstm_out = hidden[-1]  # output last hidden state output
        y_pred = self.linear(x)
        
        return y_pred

In [135]:
train_model(
        TSModelAttention(df_train.shape[1],  30, n_hidden=64, n_layers=1),
        df_train,
        df_test,
        "Close",
        sequence_length=30,
        batch_size=10,
        n_epochs=30,
        n_epochs_stop=20,
        lr=0.001
)

Starting with model training...
input shape: 10 30 7
output shape: 10 30 64 hidden shape: 1 10 64

model output: 10 1

Epoch 1 train loss: 0.137 test loss: 0.1445
Epoch 2 train loss: 0.0605 test loss: 0.1355
Epoch 3 train loss: 0.0612 test loss: 0.1225
Epoch 4 train loss: 0.0504 test loss: 0.1176
Epoch 5 train loss: 0.0432 test loss: 0.1059
Epoch 6 train loss: 0.0393 test loss: 0.0937
Epoch 7 train loss: 0.036 test loss: 0.0824
Epoch 8 train loss: 0.032 test loss: 0.0712
Epoch 9 train loss: 0.0274 test loss: 0.0597
Epoch 10 train loss: 0.0226 test loss: 0.0484
Epoch 11 train loss: 0.0178 test loss: 0.0376
Epoch 12 train loss: 0.0134 test loss: 0.0284
Epoch 13 train loss: 0.0101 test loss: 0.0243
Epoch 14 train loss: 0.0082 test loss: 0.0267
Epoch 15 train loss: 0.0076 test loss: 0.0312
Epoch 16 train loss: 0.0073 test loss: 0.0344
Epoch 17 train loss: 0.0071 test loss: 0.0353
Epoch 18 train loss: 0.0073 test loss: 0.035
Epoch 19 train loss: 0.0081 test loss: 0.0338
Epoch 20 train loss:

Unnamed: 0,training_loss,test_loss
0,0.137022,0.144499
1,0.060537,0.135549
2,0.0612,0.122494
3,0.050381,0.117582
4,0.043236,0.105892
5,0.039307,0.093682
6,0.035971,0.082431
7,0.032012,0.071198
8,0.027418,0.059735
9,0.022584,0.048439
