In [55]:
import numpy as np
import torch

In [56]:
import torch.nn as nn

# Encoder Class
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout, seq_len):
        super(Encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.seq_len = seq_len

        self.lstm_enc = nn.LSTM(input_size=input_size, hidden_size=hidden_size, dropout=dropout, batch_first=True)

    def forward(self, x):
        out, (last_h_state, last_c_state) = self.lstm_enc(x)
        x_enc = last_h_state.squeeze(dim=0)
        x_enc = x_enc.unsqueeze(1).repeat(1, x.shape[1], 1)
        return x_enc, out


# Decoder Class
class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, dropout, seq_len, use_act):
        super(Decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.seq_len = seq_len
        self.use_act = use_act  # Parameter to control the last sigmoid activation - depends on the normalization used.
        self.act = nn.Sigmoid()

        self.lstm_dec = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, z):
        # z = z.unsqueeze(1).repeat(1, self.seq_len, 1)
        dec_out, (hidden_state, cell_state) = self.lstm_dec(z)
        dec_out = self.fc(dec_out)
        if self.use_act:
            dec_out = self.act(dec_out)

        return dec_out, hidden_state


# LSTM Auto-Encoder Class
class LSTMAE(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_ratio, seq_len, use_act=True):
        super(LSTMAE, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.dropout_ratio = dropout_ratio
        self.seq_len = seq_len

        self.encoder = Encoder(input_size=input_size, hidden_size=hidden_size, dropout=dropout_ratio, seq_len=seq_len)
        self.decoder = Decoder(input_size=input_size, hidden_size=hidden_size, dropout=dropout_ratio, seq_len=seq_len, use_act=use_act)

    def forward(self, x, return_last_h=False, return_enc_out=False):
        x_enc, enc_out = self.encoder(x)
        x_dec, last_h = self.decoder(x_enc)

        if return_last_h:
            return x_dec, last_h
        elif return_enc_out:
            return x_dec, enc_out
        return x_dec


In [57]:
from torch.utils.data import Dataset
import pandas as pd

class SequenceDataset(Dataset):
    def __init__(self, df: pd.DataFrame, seq_len):
        super().__init__()
        self.seq_len = seq_len
        self.sequences = []

        for _, group in df.groupby("experiment"):
            group = group.drop(columns=['time', 'experiment'])
            self.sequences.extend(self.create_sequences(group))

    def create_sequences(self, group):
        sequences = []
        for i in range(len(group) - self.seq_len+1):
            seq = group.iloc[i:i+self.seq_len].values
            sequences.append(torch.tensor(seq, dtype=torch.float32))
        
        return sequences

    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, index):
        return self.sequences[index], self.sequences[index]

In [67]:
df = pd.read_csv('data/ae_data.csv')
df.drop("Unnamed: 0", axis=1, inplace=True)
df['experiment'] = df.index // 5000

In [68]:
df.drop(columns=['pos', 'vel', 'attack', 'attack_pred', 'cusum_stat'], inplace=True)

In [69]:
df.head()

Unnamed: 0,time,est_pos,est_vel,det_est_pos,det_est_vel,measured_vel,reference_vel,ctl_signal,residual,experiment
0,0.0,0.0,16.442814,0.159524,16.008867,15.461929,17.442814,1.0,-0.490442,0
1,0.01,0.159524,16.008867,0.322726,16.688765,17.360445,17.442814,0.103002,0.704115,0
2,0.02,0.322726,16.688765,0.491437,17.099771,17.521555,17.442814,-0.058896,0.432567,0
3,0.03,0.491437,17.099771,0.66332,17.303445,17.517581,17.442814,-0.055669,0.214265,0
4,0.04,0.66332,17.303445,0.834291,16.865072,16.313931,17.442814,1.0,-0.494729,0


In [62]:
train_ds = SequenceDataset(df[~df.experiment.isin([45, 46, 47, 48, 49])], seq_len=50)
val_ds = SequenceDataset(df[df.experiment.isin([45, 46])], seq_len=50)
test_ds = SequenceDataset(df[df.experiment.isin([47, 48, 49])], seq_len=50)

In [38]:
from torch.utils.data.dataloader import DataLoader
from torch.utils.tensorboard import SummaryWriter
def train_epoch(model: nn.Module, dataloader: DataLoader, criterion, optimizer, writer: SummaryWriter, epoch: int):
    model.train()
    for i, (x_batch, y_batch) in enumerate(dataloader):
        
        optimizer.zero_grad()

        model_out = model(x_batch)
        loss = criterion(model_out, y_batch)
        loss.backward()
        writer.add_scalar("loss/train", loss.item(), global_step=epoch*len(dataloader)+i)

        optimizer.step()

    return model

def eval_model(model: nn.Module, dataloader: DataLoader, criterion, writer: SummaryWriter, epoch: int):
    model.eval()
    total_loss = 0.0
    for i, (x_batch, y_batch) in enumerate(dataloader):
        
        with torch.no_grad():
            model_out = model(x_batch)
            loss = criterion(model_out, y_batch)
            total_loss += loss.item()
    
    total_loss = total_loss / len(dataloader)
    writer.add_scalar("loss/validation", total_loss, global_step=epoch)
    return total_loss

In [42]:
def standardize_data(df: pd.DataFrame):
    for col in df.columns:
        df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
    return df

In [70]:
from tqdm import tqdm

df = pd.read_csv('data/ae_data.csv')
df.drop("Unnamed: 0", axis=1, inplace=True)
df['experiment'] = df.index // 5000
df.drop(columns=['pos', 'vel', 'attack', 'attack_pred', 'cusum_stat'], inplace=True)

train_ds = SequenceDataset(standardize_data(df[~df.experiment.isin([45, 46, 47, 48, 49])]), seq_len=50)
val_ds = SequenceDataset(standardize_data(df[df.experiment.isin([45, 46])]), seq_len=50)
test_ds = SequenceDataset(standardize_data(df[df.experiment.isin([47, 48, 49])]), seq_len=50)

writer = SummaryWriter(log_dir="./tensorboard_logs")

dataloader = DataLoader(train_ds, batch_size=4, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=4, shuffle=True)
# Hyperparams
lr = 0.001

# Training params
NUM_EPOCHS = 50

# Model, Optimizer, Loss function
model = LSTMAE(input_size=8, hidden_size=64, dropout_ratio=0.05, seq_len=5, use_act=True)
optimizer = torch.optim.Adam(model.parameters(), lr)
criterion = torch.nn.MSELoss()

best_loss = float('inf')
for epoch in tqdm(range(NUM_EPOCHS), desc="Epoch"):
    model = train_epoch(model, dataloader, criterion, optimizer, writer, epoch)
    val_loss = eval_model(model, val_dataloader, criterion, writer, epoch)

    if val_loss < best_loss:
        torch.save(model, f"./models/ep{epoch}.pt")
        print(f"Best Validation Loss: {val_loss}")
        best_loss = val_loss

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].mi

KeyboardInterrupt: 

In [36]:
for params in model.parameters():
    print(params)
    break

Parameter containing:
tensor([[ 0.3493,  0.2138,  0.4693,  ...,  0.0509, -0.8109, -0.0593],
        [ 0.1228, -0.0900,  0.0730,  ..., -0.0366,  0.0244,  0.0203],
        [ 0.1838,  0.2163,  0.0255,  ...,  0.3073, -0.4344,  0.7951],
        ...,
        [ 0.0408, -0.0859,  0.0927,  ..., -0.1854, -2.0009, -0.0689],
        [-0.0855, -0.1021,  0.0452,  ..., -0.0476, -0.2275,  0.1095],
        [ 0.0551, -0.0714, -0.1079,  ..., -0.1081,  0.1001, -0.0120]],
       requires_grad=True)


In [52]:
dp = torch.tensor(df.iloc[:50].values, dtype=torch.float32)

In [53]:
dp

tensor([[ 0.0000e+00,  0.0000e+00,  1.9000e+01,  1.8620e-01,  1.8676e+01,
          1.8240e+01,  1.0000e+00, -3.8015e-01,  0.0000e+00],
        [ 1.0000e-02,  1.8620e-01,  1.8676e+01,  3.7516e-01,  1.9276e+01,
          1.9863e+01,  1.5582e-01,  6.2173e-01,  0.0000e+00],
        [ 2.0000e-02,  3.7516e-01,  1.9276e+01,  5.6845e-01,  1.9449e+01,
          1.9574e+01,  4.4961e-01,  1.6605e-01,  0.0000e+00],
        [ 3.0000e-02,  5.6845e-01,  1.9449e+01,  7.6258e-01,  1.9388e+01,
          1.9240e+01,  7.9096e-01, -8.3876e-02,  0.0000e+00],
        [ 4.0000e-02,  7.6258e-01,  1.9388e+01,  9.5752e-01,  1.9693e+01,
          1.9998e+01,  3.2545e-02,  3.3730e-01,  0.0000e+00],
        [ 5.0000e-02,  9.5752e-01,  1.9693e+01,  1.1562e+00,  2.0180e+01,
          2.0757e+01, -7.3405e-01,  5.4815e-01,  0.0000e+00],
        [ 6.0000e-02,  1.1562e+00,  2.0180e+01,  1.3562e+00,  1.9654e+01,
          1.9016e+01,  1.0000e+00, -5.9683e-01,  0.0000e+00],
        [ 7.0000e-02,  1.3562e+00,  1.9654e+01, 

In [54]:
model.eval()

model(dp)

RuntimeError: input.size(-1) must be equal to input_size. Expected 7, got 9