In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf 
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from pytorch_tcn import TCN
from keras import layers, models



Get Data

In [None]:
df = pd.read_csv("./simulated_dataset/all_data.csv")

df_pivot = df.pivot(index="timestamp", columns="sensor_id", values=["temperature", "label"])
df_pivot = df_pivot.reset_index(drop=True)

df_pivot.head()

sensor_dfs = {}

for i in range(1, 5):
    sensor_df = df_pivot.xs(i, axis=1, level=1).copy()
    
    # fill temperature NaNs with mean
    sensor_df['temperature'] = sensor_df['temperature'].fillna(sensor_df['temperature'].mean())
    
    # fill label NaNs with mode (most common value)
    sensor_df['label'] = sensor_df['label'].fillna(sensor_df['label'].mode()[0])
    
    sensor_dfs[i] = sensor_df

sensor = 2



temperature    0
label          0
dtype: int64


Train/Test Split

In [148]:
train_dfs = {}
test_dfs = {}

for i, sensor_df in sensor_dfs.items():
    n = int(len(sensor_df) * .8)
    train_dfs[i] = sensor_df.iloc[:n].reset_index(drop=True)
    test_dfs[i] = sensor_df.iloc[n:].reset_index(drop=True)

train_dfs[sensor]

Unnamed: 0,temperature,label
0,48.257809,0.0
1,45.597643,0.0
2,46.191493,0.0
3,46.138341,0.0
4,53.772521,0.0
...,...,...
17275,41.906796,0.0
17276,42.636989,0.0
17277,40.625906,0.0
17278,49.301583,0.0


Convert train/test to sequences

In [149]:
# number of past timesteps the TCN sees
seq_length = 20

# creates sequences based on data with window size == seq_length
def create_sequences(df, seq_length=20):
    '''
    Sequences are needed for giving the model information about history, since feed-forward networks do not know the order of the data.
    TCN's expect input of shape [batch_size, num_features, sequence_length]
    
    Args:
        df: time-series data
        seq_length: length of each sequence
    
    Returns:
        X (shape: [num_samples, seq_length, num_inputs]), y (shape: [num_samples, num_inputs])
    '''
    X, y = [], []
    values = df['temperature'].values
    labels = df['label'].values
    
    X = np.lib.stride_tricks.sliding_window_view(values, window_shape=seq_length)
    X = X[:-1]
    y = labels[seq_length:]
    
    X = torch.from_numpy(X).unsqueeze(-1).float()
    y = torch.from_numpy(y).unsqueeze(-1).float()
    
    return X, y


X_train, y_train = create_sequences(train_dfs[sensor], seq_length)
X_test, y_test = create_sequences(test_dfs[sensor], seq_length)

num_ones = (y_train == 1).sum().item()
print(num_ones)

453


Normalize

In [None]:
X_train = X_train.float()
X_test = X_test.float()

mean = X_train.mean()
std = X_train.std()
X_train_norm = (X_train - mean) / std
X_test_norm = (X_test - mean) / std



tensor([[[-0.0505],
         [-0.4075],
         [-0.3278],
         ...,
         [ 0.8644],
         [-0.4438],
         [ 0.2202]],

        [[-0.4075],
         [-0.3278],
         [-0.3349],
         ...,
         [-0.4438],
         [ 0.2202],
         [-0.2978]],

        [[-0.3278],
         [-0.3349],
         [ 0.6897],
         ...,
         [ 0.2202],
         [-0.2978],
         [ 0.7658]],

        ...,

        [[-0.8262],
         [-0.7155],
         [-0.1987],
         ...,
         [ 0.0063],
         [-0.9029],
         [-0.8049]],

        [[-0.7155],
         [-0.1987],
         [-0.1509],
         ...,
         [-0.9029],
         [-0.8049],
         [-1.0748]],

        [[-0.1987],
         [-0.1509],
         [-0.6137],
         ...,
         [-0.8049],
         [-1.0748],
         [ 0.0896]]])

DataLoaders

In [155]:
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train_norm, y_train), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test_norm, y_test), batch_size=batch_size, shuffle=False)


Build CNN

In [156]:
num_inputs = 1
num_channels = [32, 32, 64, 64]
kernel_size = 4
dropout = 0.1

model = TCN(
    num_inputs=num_inputs,
    num_channels=num_channels,
    kernel_size=kernel_size,
    dropout=dropout,
    causal=True,
    use_skip_connections=True,
    output_projection=1,
    output_activation=None
)

Loss and Optimization

In [157]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=.001)

In [160]:
model.train()

for x_batch, y_batch in train_loader:
    xb = x_batch
    yb = y_batch
    break

xb = xb.permute(0, 2, 1)

y_pred = model(xb)
y_pred

tensor([[[0.3018, 0.3096, 0.3008, 0.3635, 0.2540, 0.3690, 0.2559, 0.2872,
          0.3344, 0.3827, 0.3859, 0.4297, 0.4203, 0.4057, 0.3438, 0.3500,
          0.3441, 0.2639, 0.2405, 0.2766]],

        [[0.3079, 0.2730, 0.3400, 0.3249, 0.3286, 0.2845, 0.2794, 0.2219,
          0.3987, 0.3399, 0.4733, 0.3980, 0.3767, 0.2991, 0.2779, 0.3069,
          0.2764, 0.2490, 0.2146, 0.3079]],

        [[0.3187, 0.2895, 0.3362, 0.3267, 0.3221, 0.3427, 0.3864, 0.3733,
          0.3458, 0.3241, 0.3870, 0.4311, 0.3328, 0.3150, 0.2031, 0.2728,
          0.2684, 0.1944, 0.2716, 0.2940]],

        [[0.2876, 0.2680, 0.3112, 0.3545, 0.3464, 0.3669, 0.3529, 0.2888,
          0.4230, 0.4281, 0.3473, 0.4551, 0.2958, 0.4182, 0.2661, 0.3298,
          0.2201, 0.1719, 0.2916, 0.2078]],

        [[0.2881, 0.3047, 0.2477, 0.3191, 0.3497, 0.3300, 0.3405, 0.3406,
          0.3772, 0.3612, 0.3390, 0.3577, 0.4030, 0.3067, 0.2701, 0.2398,
          0.2839, 0.2779, 0.3404, 0.2806]],

        [[0.3130, 0.3203, 0.3136, 0

Training

In [162]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    
    for x_batch, y_batch in train_loader:
        # reorders tensors to [batch_size, features, sequence_length]
        x_batch = x_batch.permute(0, 2, 1)
        
        # clears gradients from previous batch
        optimizer.zero_grad()
        
        # forward propagation
        y_pred = model(x_batch)
        loss = criterion(y_pred, y_batch.unsqueeze(-1).repeat(1, 1, 20))
        
        # backward propagation
        loss.backward()
        
        # update step and accumulate loss
        optimizer.step()
        x = loss.item()
        train_loss += loss.item() * x_batch.size(0)
        
    train_loss /= len(train_loader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}')

Epoch 1/10, Train Loss: 0.1935
Epoch 2/10, Train Loss: 0.0256
Epoch 3/10, Train Loss: 0.0243
Epoch 4/10, Train Loss: 0.0247
Epoch 5/10, Train Loss: 0.0235
Epoch 6/10, Train Loss: 0.0231
Epoch 7/10, Train Loss: 0.0232
Epoch 8/10, Train Loss: 0.0232
Epoch 9/10, Train Loss: 0.0229
Epoch 10/10, Train Loss: 0.0229


In [None]:
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.permute(0, 2, 1)
        y_pred = model(xb)
        probs = torch.sigmoid(y_pred)
        print(probs)
        preds = (probs[:, :, -1] > 0.5).float()
        all_preds.append(preds)
        all_labels.append(yb)

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

accuracy = (all_preds == all_labels).float().mean()
print(f"Test Accuracy: {accuracy:.4f}")
print("Labels distribution:")
print(all_labels.unique(return_counts=True))
print("Predictions distribution:")
print(all_preds.unique(return_counts=True))

y_pred = model(xb)              # [batch_size, 1, seq_len]
probs = torch.sigmoid(y_pred)
preds = (probs > 0.5).float()


tensor([[[0.0035, 0.0026, 0.0020, 0.0017, 0.0017, 0.0020, 0.0023, 0.0019,
          0.0014, 0.0012, 0.0010, 0.0010, 0.0008, 0.0006, 0.0006, 0.0004,
          0.0004, 0.0004, 0.0004, 0.0004]],

        [[0.0042, 0.0025, 0.0020, 0.0017, 0.0019, 0.0024, 0.0019, 0.0019,
          0.0013, 0.0010, 0.0010, 0.0015, 0.0007, 0.0006, 0.0004, 0.0007,
          0.0005, 0.0004, 0.0004, 0.0003]],

        [[0.0036, 0.0024, 0.0021, 0.0020, 0.0023, 0.0023, 0.0019, 0.0017,
          0.0011, 0.0011, 0.0015, 0.0012, 0.0006, 0.0004, 0.0007, 0.0007,
          0.0004, 0.0004, 0.0003, 0.0003]],

        [[0.0038, 0.0025, 0.0025, 0.0023, 0.0023, 0.0021, 0.0017, 0.0014,
          0.0012, 0.0016, 0.0012, 0.0011, 0.0004, 0.0007, 0.0007, 0.0007,
          0.0004, 0.0003, 0.0003, 0.0003]],

        [[0.0039, 0.0029, 0.0029, 0.0024, 0.0022, 0.0019, 0.0014, 0.0015,
          0.0017, 0.0013, 0.0011, 0.0008, 0.0008, 0.0008, 0.0007, 0.0007,
          0.0003, 0.0004, 0.0003, 0.0003]],

        [[0.0044, 0.0033, 0.0030, 0

Testing