# DATA COLLECTION AND PREPROCESSING:

In [80]:
from pathlib import Path # convenient way to deal w/ paths
import plotly.graph_objects as go # creates plots
import numpy as np # standard for data processing
import pandas as pd # standard for data processing
import json # we have anomalies' timestamps in json format

In [81]:
# Path to the whole data from NAB git repository
nab = Path.cwd()/'NAB'

# This folder contains all files w/ metrics
data_path = nab/'data'

# There is also separate json file 
# w/ timestamps of anomalies in files w/ metrics
labels_filepath = nab/'labels/combined_labels.json'

# Path from data folder to the training file
training_filename = 'realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv'

# Path from data folder to the validation file
valid_filename = 'realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv'

In [82]:
with open(labels_filepath, 'r') as f:
    anomalies_timestamps = json.load(f)

In [83]:
train = pd.read_csv(data_path/training_filename)
valid = pd.read_csv(data_path/valid_filename)
train.head()

Unnamed: 0,timestamp,value
0,2014-02-14 14:30:00,6.456
1,2014-02-14 14:35:00,5.816
2,2014-02-14 14:40:00,6.268
3,2014-02-14 14:45:00,5.816
4,2014-02-14 14:50:00,5.862


In [84]:
from sklearn.preprocessing import StandardScaler

# Let's make it function for further usage
def parse_and_standardize(df: pd.DataFrame, scaler: StandardScaler = None):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['stand_value'] = df['value']
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(df['stand_value'].values.reshape(-1, 1))
    df['stand_value'] = scaler.transform(df['stand_value'].values.reshape(-1, 1))
    return scaler

data_scaler = parse_and_standardize(train)
parse_and_standardize(valid, data_scaler)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [85]:
train_anomalies = train[train['timestamp'].isin(anomalies_timestamps[training_filename])]
valid_anomalies = valid[valid['timestamp'].isin(anomalies_timestamps[valid_filename])]
train_anomalies

Unnamed: 0,timestamp,value,stand_value
3080,2014-02-25 07:15:00,25.1033,4.652449
3579,2014-02-27 00:50:00,19.165,3.026441


In [86]:
# Prepare layout w/ titles
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization')) 

# Create the figure for plotting the data
fig = go.Figure(layout=layout) 

# Add non-anomaly data to the figure
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))

# Add anomaly data to the figure
fig.add_trace(go.Scatter(x=train_anomalies['timestamp'],
                         y=train_anomalies['value'], 
                         mode='markers', name='Anomaly',
                         marker=dict(color='green', size=13)))

In [87]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers', name='Non-anomaly',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid_anomalies['timestamp'],
                         y=valid_anomalies['value'], 
                         mode='markers', name='Anomaly',
                         marker=dict(color='green', size=13)))

# ARIMA MODEL:

In [88]:
import statsmodels.api as sm
from itertools import product
import warnings
warnings.filterwarnings('ignore')

In [89]:
def write_predict(train_df: pd.DataFrame, valid_df: pd.DataFrame):
    # Initial approximation of parameters
    Qs = range(0, 2)
    qs = range(0, 3)
    Ps = range(0, 3)
    ps = range(0, 3)
    D=1
    d=1
    parameters = product(ps, qs, Ps, Qs)
    parameters_list = list(parameters)
    
    # Best Model Selection
    results = []
    best_aic = float("inf")
    for param in parameters_list:
        try:
            model=sm.tsa.statespace.SARIMAX(
                train_df.value, order=(param[0], d, param[1]),
                seasonal_order=(param[2], D, param[3], 12),
                initialization='approximate_diffuse'
                ).fit()
        except ValueError:
            print('wrong parameters:', param)
            continue
        aic = model.aic
        if aic < best_aic:
            best_model = model
            best_aic = aic
            best_param = param
        results.append([param, model.aic])
    
    # Writing of the predictions for training data
    train_df['predict'] = best_model.predict()
    train_df['predict'].fillna(0, inplace=True)
    
    # Writing of the predictions for validation data
    best_model_valid = sm.tsa.statespace.SARIMAX(
        valid_df.value, order=(best_param[0], d, best_param[1]),
        seasonal_order=(best_param[2], D, best_param[3], 12),
        initialization='approximate_diffuse'
        ).fit()
    valid_df['predict'] = best_model_valid.predict()
    valid_df['predict'].fillna(0, inplace=True)
    

# Calling of the function
write_predict(train, valid)

In [90]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers',name='ground truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train['timestamp'],
                         y=train['predict'], 
                         mode='markers',name='predicted values',
                         marker=dict(color='green')))

In [91]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers',name='ground truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid['timestamp'],
                         y=valid['predict'], 
                         mode='markers',name='predicted values',
                         marker=dict(color='green')))

# CNN MODEL:

In [117]:
# PyTorch itself
import torch 
import tqdm
# Dataset - the base class to be inherited
from torch.utils.data import Dataset, DataLoader 
# We will need DataLoader later for the training process

In [118]:
class CPUDataset(Dataset):
    def __init__(self, data: pd.DataFrame, size: int, 
                 step: int = 1):
        self.chunks = torch.FloatTensor(data['stand_value']).unfold(0, size+1, step)
        self.chunks = self.chunks.view(-1, 1, size+1)
    def __len__(self):
        return self.chunks.size(0)
    
    def __getitem__(self, i):
        x = self.chunks[i, :, :-1]
        y = self.chunks[i, :, -1:].squeeze(1)
        return x, y

In [119]:
n_factors = 10
train_ds = CPUDataset(train, n_factors)
valid_ds = CPUDataset(valid, n_factors)

In [120]:
# here PyTorch has all neural net functions and activations
import torch.nn as nn

def conv_layer(in_feat, out_feat, kernel_size=3, stride=1,
               padding=1, relu=True):
    res = [
        nn.Conv1d(in_feat, out_feat, kernel_size=kernel_size,
                  stride=stride, padding=padding, bias=False),
        nn.BatchNorm1d(out_feat),
    ]
    if relu:
        res.append(nn.ReLU())
    return nn.Sequential(*res)

In [121]:
class ResBlock(nn.Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.in_feat, self.out_feat = in_feat, out_feat
        self.conv1 = conv_layer(in_feat, out_feat)
        self.conv2 = conv_layer(out_feat, out_feat, relu=False)
        if self.apply_shortcut:
            self.shortcut = conv_layer(in_feat, out_feat,
                                       kernel_size=1, padding=0,
                                       relu=False)
    
    def forward(self, x):
        out = self.conv1(x)
        if self.apply_shortcut:
            x = self.shortcut(x)
        return x + self.conv2(out)
    
    @property
    def apply_shortcut(self):
        return self.in_feat != self.out_feat

In [122]:
class AdaptiveConcatPool1d(nn.Module):
    def __init__(self):
        super().__init__()
        self.ap = nn.AdaptiveAvgPool1d(1)
        self.mp = nn.AdaptiveMaxPool1d(1)
    
    def forward(self, x): 
        return torch.cat([self.mp(x), self.ap(x)], 1)

In [123]:
class CNN(nn.Module):
    def __init__(self, out_size):
        super().__init__()
        self.base = nn.Sequential(
            ResBlock(1, 8), #shape = batch, 8, n_factors
            ResBlock(8, 8), 
            ResBlock(8, 16), #shape = batch, 16, n_factors
            ResBlock(16, 16),
            ResBlock(16, 32), #shape = batch, 32, n_factors
            ResBlock(32, 32),
            ResBlock(32, 64), #shape = batch, 64, n_factors
            ResBlock(64, 64),
        )
        self.head = nn.Sequential(
            AdaptiveConcatPool1d(), #shape = batch, 128, 1
            nn.Flatten(),
            nn.Linear(128, out_size)
        )
        
    def forward(self, x):
        out = self.base(x)
        out = self.head(out)
        return out

In [124]:
def train_model(model: CNN, dataloaders: dict, optimizer: torch.optim.Optimizer, 
                scheduler, criterion, device: torch.device, epochs: int):
    losses_data = {'train': [], 'valid': []}
    model.to(device)
    
    # Loop over epochs
    for epoch in range(epochs):
        print(f'Epoch {epoch}/{epochs-1}')
        
        # Training and validation phases
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.
            running_total = 0.
            
            # Loop over batches of data
            for idx, batch in tqdm.tqdm(enumerate(dataloaders[phase]), 
                                   total=len(dataloaders[phase]), 
                                   leave=False
                                   ):
                x, y = batch
                x = x.to(device)
                y = y.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    out = model(x)
                    loss = criterion(out, y)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        scheduler.step()

                running_loss += loss.item() * y.size(0)
                running_total += y.size(0)

            epoch_loss = running_loss / running_total
            print(f'{phase.capitalize()} Loss: {epoch_loss}')
            losses_data[phase].append(epoch_loss)
    return losses_data

In [125]:
epochs = 50
cnn_model = CNN(out_size=1)
dataloader = {
    'train': DataLoader(train_ds, batch_size=128, shuffle=True),
    'valid': DataLoader(valid_ds, batch_size=128)
}
optim = torch.optim.Adam(cnn_model.parameters(), lr=1e-1, weight_decay=1e-3)
sched = torch.optim.lr_scheduler.OneCycleLR(optim, max_lr=1e-3, steps_per_epoch=len(dataloader['train']), epochs=epochs)
criterion = nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [126]:
losses = train_model(cnn_model, dataloader, optim, sched, criterion, device, epochs)

  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Epoch 0/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 34.63it/s]

Train Loss: 0.6346121549250652


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 13.79it/s]

Valid Loss: 3.2489305823552317
Epoch 1/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.28it/s]

Train Loss: 0.17241920997615123


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.85it/s]

Valid Loss: 6.750586687777542
Epoch 2/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 28.64it/s]

Train Loss: 0.09234799583239202


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.42it/s]

Valid Loss: 3.2035556071910736
Epoch 3/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 30.96it/s]

Train Loss: 0.06347614593992833


  3%|██▌                                                                                | 1/32 [00:00<00:03,  8.75it/s]

Valid Loss: 1.8652817495482172
Epoch 4/49


  9%|███████▊                                                                           | 3/32 [00:00<00:00, 29.38it/s]

Train Loss: 0.053380304340815914


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 1.0851877908366405
Epoch 5/49


  9%|███████▊                                                                           | 3/32 [00:00<00:00, 29.47it/s]

Train Loss: 0.050026231017917734


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.60it/s]

Valid Loss: 1.1631856920380073
Epoch 6/49


  9%|███████▊                                                                           | 3/32 [00:00<00:00, 29.49it/s]

Train Loss: 0.05256358562069767


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.68it/s]

Valid Loss: 0.9178187225749397
Epoch 7/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.11it/s]

Train Loss: 0.0777854579841243


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 1.96739901141377
Epoch 8/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 32.67it/s]

Train Loss: 0.20541785354343947


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.96it/s]

Valid Loss: 1.5540862164053972
Epoch 9/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.27it/s]

Train Loss: 0.058537521631577785


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 0.5131183435102116
Epoch 10/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 32.58it/s]

Train Loss: 0.04671550802756814


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 0.6272551732638889
Epoch 11/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 32.74it/s]

Train Loss: 0.03438195106233014


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.15it/s]

Valid Loss: 1.1093245166071846
Epoch 12/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.79it/s]

Train Loss: 0.03466613864741964


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.80it/s]

Valid Loss: 1.9374544252690127
Epoch 13/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.04it/s]

Train Loss: 0.025011275725903233


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 11.55it/s]

Valid Loss: 0.6086627343796427
Epoch 14/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.33it/s]

Train Loss: 0.029442441478845848


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.74it/s]

Valid Loss: 1.550807479896811
Epoch 15/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.28it/s]

Train Loss: 0.03360968721870283


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.85it/s]

Valid Loss: 1.307879576467863
Epoch 16/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.31it/s]

Train Loss: 0.026147055100188332


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 0.36390854902509906
Epoch 17/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.65it/s]

Train Loss: 0.026028906416624028


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.58it/s]

Valid Loss: 0.6921427488801255
Epoch 18/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 34.27it/s]

Train Loss: 0.022595070419191187


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.45it/s]

Valid Loss: 0.5328408991005633
Epoch 19/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 36.75it/s]

Train Loss: 0.022028974010569725


  3%|██▌                                                                                | 1/32 [00:00<00:05,  6.18it/s]

Valid Loss: 0.9055236353247632
Epoch 20/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 34.54it/s]

Train Loss: 0.022531916091912334


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.45it/s]

Valid Loss: 0.5553044770959596
Epoch 21/49


  9%|███████▊                                                                           | 3/32 [00:00<00:00, 29.33it/s]

Train Loss: 0.021159421048654602


  3%|██▌                                                                                | 1/32 [00:00<00:03, 10.00it/s]

Valid Loss: 0.6240460260524375
Epoch 22/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.34it/s]

Train Loss: 0.020892247613213774


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.66it/s]

Valid Loss: 0.5970837055288222
Epoch 23/49


  9%|███████▊                                                                           | 3/32 [00:00<00:00, 29.97it/s]

Train Loss: 0.020524037888895095


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.27it/s]

Valid Loss: 0.40203113176673283
Epoch 24/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 32.04it/s]

Train Loss: 0.019534190861977847


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 0.46298769013775576
Epoch 25/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 32.99it/s]

Train Loss: 0.019739902457421483


  0%|                                                                                           | 0/32 [00:00<?, ?it/s]

Valid Loss: 0.40996714944806284
Epoch 26/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 34.95it/s]

Train Loss: 0.020596423952815725


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.83it/s]

Valid Loss: 0.28562447357291104
Epoch 27/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.44it/s]

Train Loss: 0.021719540768057065


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.30it/s]

Valid Loss: 0.9935045876887116
Epoch 28/49


  9%|███████▊                                                                           | 3/32 [00:00<00:01, 28.90it/s]

Train Loss: 0.019894301301782578


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.91it/s]

Valid Loss: 0.44824909023507087
Epoch 29/49


 16%|████████████▉                                                                      | 5/32 [00:00<00:00, 44.10it/s]

Train Loss: 0.019999614493455508


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.94it/s]

Valid Loss: 0.29555704715846487
Epoch 30/49


  9%|███████▊                                                                           | 3/32 [00:00<00:01, 26.71it/s]

Train Loss: 0.02047673098706839


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.99it/s]

Valid Loss: 0.48546555613860864
Epoch 31/49


 16%|████████████▉                                                                      | 5/32 [00:00<00:00, 42.98it/s]

Train Loss: 0.019117152724972653


  6%|█████▏                                                                             | 2/32 [00:00<00:01, 15.48it/s]

Valid Loss: 0.5829972201233062
Epoch 32/49


 16%|████████████▉                                                                      | 5/32 [00:00<00:00, 43.00it/s]

Train Loss: 0.01857630415457893


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.63it/s]

Valid Loss: 0.25854051189079386
Epoch 33/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.63it/s]

Train Loss: 0.01899093704984474


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.18it/s]

Valid Loss: 0.48402723357311117
Epoch 34/49


 22%|██████████████████▏                                                                | 7/32 [00:00<00:00, 58.27it/s]

Train Loss: 0.018106168216582497


  6%|█████▏                                                                             | 2/32 [00:00<00:01, 18.38it/s]

Valid Loss: 0.34566186980146607
Epoch 35/49


 19%|███████████████▌                                                                   | 6/32 [00:00<00:00, 55.95it/s]

Train Loss: 0.018583277985083115


  6%|█████▏                                                                             | 2/32 [00:00<00:01, 16.48it/s]

Valid Loss: 0.4608747054063699
Epoch 36/49


 19%|███████████████▌                                                                   | 6/32 [00:00<00:00, 54.28it/s]

Train Loss: 0.017836083330154746


  6%|█████▏                                                                             | 2/32 [00:00<00:01, 15.84it/s]

Valid Loss: 0.42073368069575473
Epoch 37/49


  9%|███████▊                                                                           | 3/32 [00:00<00:00, 29.40it/s]

Train Loss: 0.017562644137972656


  6%|█████▏                                                                             | 2/32 [00:00<00:03,  9.62it/s]

Valid Loss: 0.4701718762257632
Epoch 38/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 30.27it/s]

Train Loss: 0.017058828829874275


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.20it/s]

Valid Loss: 0.4830731645389495
Epoch 39/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.70it/s]

Train Loss: 0.01689639315981657


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 14.89it/s]

Valid Loss: 0.47692728981798055
Epoch 40/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 37.62it/s]

Train Loss: 0.017172220915607824


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 12.47it/s]

Valid Loss: 0.39857403810998804
Epoch 41/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.74it/s]

Train Loss: 0.016954339596094152


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.80it/s]

Valid Loss: 0.3444829021021539
Epoch 42/49


  9%|███████▊                                                                           | 3/32 [00:00<00:01, 24.30it/s]

Train Loss: 0.017232984607304762


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.21it/s]

Valid Loss: 0.4385293256667406
Epoch 43/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 33.09it/s]

Train Loss: 0.016509643992041398


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.63it/s]

Valid Loss: 0.39156988912601487
Epoch 44/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.32it/s]

Train Loss: 0.016468002490536274


  6%|█████▏                                                                             | 2/32 [00:00<00:01, 15.46it/s]

Valid Loss: 0.42063107061436494
Epoch 45/49


 16%|████████████▉                                                                      | 5/32 [00:00<00:00, 48.28it/s]

Train Loss: 0.01652241440666798


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.54it/s]

Valid Loss: 0.3875270898197789
Epoch 46/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 29.98it/s]

Train Loss: 0.016281756682646534


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.90it/s]

Valid Loss: 0.4012457134747671
Epoch 47/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 34.19it/s]

Train Loss: 0.01674570541732543


  6%|█████▏                                                                             | 2/32 [00:00<00:02, 10.96it/s]

Valid Loss: 0.40637719604149203
Epoch 48/49


  9%|███████▊                                                                           | 3/32 [00:00<00:01, 28.90it/s]

Train Loss: 0.01641225609196649


  3%|██▌                                                                                | 1/32 [00:00<00:03,  9.16it/s]

Valid Loss: 0.40696403639106715
Epoch 49/49


 12%|██████████▍                                                                        | 4/32 [00:00<00:00, 31.68it/s]

Train Loss: 0.016326337617422325


                                                                                                                       

Valid Loss: 0.38108371384534473




In [128]:
layout = dict(xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(y=losses['train'], mode='lines', name='Train Loss',))
fig.add_trace(go.Scatter(y=losses['valid'], mode='lines', name='Valid Loss'))

In [129]:
# Switching model into evaluation mode
cnn_model = cnn_model.eval()

# Calculation of the predictions for training data
with torch.no_grad():
    res_train = cnn_model(train_ds[:][0].to(device))
res_train = res_train.cpu()

# Calculation of the predictions for validation data
with torch.no_grad():
    res_valid = cnn_model(valid_ds[:][0].to(device))
res_valid = res_valid.cpu()

In [130]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers',name='ground truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train['timestamp'],
                         y=train['predict'], 
                         mode='markers',name='predicted value',
                         marker=dict(color='orange')))

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers',name='ground truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid['timestamp'],
                         y=valid['predict'], 
                         mode='markers',name='predicted value',
                         marker=dict(color='orange')))

# Long Short-Term Memory Neural Network:

In [106]:
class CPUDataset(Dataset):
    def __init__(self, data: pd.DataFrame, size: int):
        self.chunks = torch.FloatTensor(data['stand_value']).unfold(0, size, size)
        
    def __len__(self):
        return self.chunks.size(0)
    
    def __getitem__(self, i):
        x = self.chunks[i]
        return x

train_ds = CPUDataset(train, 64)
valid_ds = CPUDataset(valid, 64)

In [107]:
class LSTMModel(nn.Module):
    def __init__(self, in_size, hidden_size, out_size, device):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(in_size, hidden_size)
        self.linear = nn.Linear(hidden_size, out_size)
        self.device = device
        self.init_hidden()
        
    def forward(self, x):
        out, self.hidden_state = self.lstm(
            x.view(len(x), 1, -1), self.hidden_state
        )
        self.hidden_state = tuple(
            [h.detach() for h in self.hidden_state]
        )
        out = out.view(len(x), -1)
        out = self.linear(out)
        return out
    
    def init_hidden(self):
        self.hidden_state = (
            torch.zeros((1, 1, self.hidden_size)).to(self.device),
            torch.zeros((1, 1, self.hidden_size)).to(self.device))

In [108]:
def train_model(model: LSTMModel, dataloaders: dict, optimizer: torch.optim.Optimizer, 
                scheduler, criterion, device: torch.device, epochs: int):
    losses_data = {'train': [], 'valid': []}
    model.to(device)
    for epoch in range(epochs):
        print(f'Epoch {epoch}/{epochs-1}')
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.
            running_total = 0.
            
        # Here changes start
            for idx, sequence in enumerate(dataloaders[phase]):
                value = sequence
                value = value.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    out = model(value.view(-1, 1))
                    loss = criterion(out.view(-1), value.view(-1))
        # Here changes end

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        scheduler.step()

                running_loss += loss.item() * out.size(0)
                running_total += out.size(0)

            epoch_loss = running_loss / running_total
            print(f'{phase.capitalize()} Loss: {epoch_loss}')
            losses_data[phase].append(epoch_loss)
    return losses_data

In [109]:
epochs = 50
model = LSTMModel(1, 128, 1, device)
dataloaders = {
    'train': DataLoader(train_ds, batch_size=1),
    'valid': DataLoader(valid_ds, batch_size=1)
}
optim = torch.optim.Adam(params=model.parameters(), lr=1e-3)
sched = torch.optim.lr_scheduler.OneCycleLR(
  optim, max_lr=1e-3, steps_per_epoch=len(dataloaders['train']), epochs=epochs
)
criterion = nn.MSELoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [110]:
losses = train_model(model, dataloaders, optim, sched, criterion, device, epochs)

Epoch 0/49
Train Loss: 0.9274842351201981
Valid Loss: 9.807984102339972
Epoch 1/49
Train Loss: 0.839153376600099
Valid Loss: 8.978499842068505
Epoch 2/49
Train Loss: 0.660488348158579
Valid Loss: 6.928682058576554
Epoch 3/49
Train Loss: 0.0807980694068921
Valid Loss: 2.3914287277453004
Epoch 4/49
Train Loss: 0.07264868629031948
Valid Loss: 2.4769443627151233
Epoch 5/49
Train Loss: 0.04673295231565597
Valid Loss: 2.64144859389062
Epoch 6/49
Train Loss: 0.04081485432291788
Valid Loss: 2.6113968309724616
Epoch 7/49
Train Loss: 0.05140312655001051
Valid Loss: 2.8864285078136223
Epoch 8/49
Train Loss: 0.06481872857104809
Valid Loss: 2.83573654926722
Epoch 9/49
Train Loss: 0.07882539639692931
Valid Loss: 2.667318197824652
Epoch 10/49
Train Loss: 0.06727266299080044
Valid Loss: 2.704647759951296
Epoch 11/49
Train Loss: 0.04125916458562844
Valid Loss: 2.692615191910475
Epoch 12/49
Train Loss: 0.0338555355371523
Valid Loss: 2.6811779572791052
Epoch 13/49
Train Loss: 0.03086674804355772
Valid Lo

In [111]:
layout = dict(xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(y=losses['train'], mode='lines', name='Train Loss',))
fig.add_trace(go.Scatter(y=losses['valid'], mode='lines', name='Valid Loss'))

In [112]:
train_values = train['stand_value'].values.astype(np.float32).flatten()
valid_values = valid['stand_value'].values.astype(np.float32).flatten()

In [113]:
# Switching model into evaluation mode
model.eval()

# Calculation of the predictions for training data
with torch.no_grad():
    res_train = model(torch.tensor(train_values).to(device))
res_train = res_train.cpu()

# Calculation of the predictions for validation data
with torch.no_grad():
    res_valid = model(torch.tensor(valid_values).to(device))
res_valid = res_valid.cpu()

In [114]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=train['timestamp'], y=train['value'], 
                         mode='markers',name='ground truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=train['timestamp'],
                         y=train['predict'], 
                         mode='markers',name='predicted value',
                         marker=dict(color='orange')))

In [116]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['value'], 
                         mode='markers',name='ground truth',
                         marker=dict(color='blue')))
fig.add_trace(go.Scatter(x=valid['timestamp'],
                         y=valid['predict'], 
                         mode='markers',name='predicted value',
                         marker=dict(color='orange')))