In [458]:
import pandas as pd
import numpy as np
import tensorflow as tf 
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from pytorch_tcn import TCN
from keras import layers, models



Get Data

In [459]:
df = pd.read_csv("./simulated_dataset/all_data.csv")

df_pivot = df.pivot(index="timestamp", columns="sensor_id", values=["temperature", "label"])
df_pivot = df_pivot.reset_index(drop=True)

df_pivot.head()

sensor_dfs = {}

for i in range(1, 5):
    sensor_df = df_pivot.xs(i, axis=1, level=1).copy()
    
    # fill temperature NaNs with mean
    sensor_df['temperature'] = sensor_df['temperature'].fillna(sensor_df['temperature'].mean())
    
    # fill label NaNs with mode (most common value)
    sensor_df['label'] = sensor_df['label'].fillna(sensor_df['label'].mode()[0])
    
    sensor_dfs[i] = sensor_df

sensor = 2

df = sensor_dfs[sensor]
df

Unnamed: 0,temperature,label
0,49.441279,0.0
1,50.375508,0.0
2,49.001814,0.0
3,46.346798,0.0
4,45.003479,0.0
...,...,...
21710,47.484230,0.0
21711,47.484230,0.0
21712,47.484230,0.0
21713,47.484230,0.0


Normalize

In [460]:
scaler = MinMaxScaler()
df['temperature']
df['temperature'] = scaler.fit_transform(df[['temperature']])

Create Sequences

In [461]:
# number of past timesteps the TCN sees
seq_length = 40

# creates sequences based on data with window size == seq_length
def create_sequences(df, seq_length=20):
    '''
    Sequences are needed for giving the model information about history, since feed-forward networks do not know the order of the data.
    TCN's expect input of shape [batch_size, num_features, sequence_length]
    
    Args:
        df: time-series data
        seq_length: length of each sequence
    
    Returns:
        X (shape: [num_samples, seq_length, num_inputs]), y (shape: [num_samples, num_inputs])
    '''
    values = df['temperature'].values
    X, y = [], []
    for i in range(len(values) - seq_length):
        # sequence
        X.append(values[i:i + seq_length])
        # next value after sequence
        y.append(values[i + seq_length])
    X = np.array(X)
    y = np.array(y)
    
    return X, y

X, y = create_sequences(df, seq_length)

X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)



Train/Validation/Test Split

In [462]:

train_size = int(.8 * len(X))
val_size = int(.1 * len(X))

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size]
X_test, y_test = X[train_size + val_size:], y[train_size + val_size:]

print(X_train[0])
print(y_train[0])
print(X_train[1])
print(y_train[1])

tensor([[0.1928],
        [0.2113],
        [0.1842],
        [0.1319],
        [0.1054],
        [0.0113],
        [0.1785],
        [0.1741],
        [0.0960],
        [0.1256],
        [0.1844],
        [0.0447],
        [0.1466],
        [0.1114],
        [0.1803],
        [0.1325],
        [0.0628],
        [0.0182],
        [0.1438],
        [0.0940],
        [0.1801],
        [0.1757],
        [0.1332],
        [0.1814],
        [0.1483],
        [0.1685],
        [0.1287],
        [0.1439],
        [0.1989],
        [0.0304],
        [0.1222],
        [0.1116],
        [0.1839],
        [0.1111],
        [0.0675],
        [0.0250],
        [0.0709],
        [0.1665],
        [0.1777],
        [0.1044]])
tensor([0.0253])
tensor([[0.2113],
        [0.1842],
        [0.1319],
        [0.1054],
        [0.0113],
        [0.1785],
        [0.1741],
        [0.0960],
        [0.1256],
        [0.1844],
        [0.0447],
        [0.1466],
        [0.1114],
        [0.1803],
        [0

DataLoaders

In [463]:
batch_size = 32

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
# test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)

Build CNN

In [464]:
num_inputs = 1
num_channels = [16, 32, 16]
kernel_size = 4
dilations = [1, 2, 4]
dropout = 0.1

model = TCN(
    num_inputs=num_inputs,
    num_channels=num_channels,
    kernel_size=kernel_size,
    dilations=dilations,
    dropout=dropout,
    causal=True,
    use_skip_connections=True,
    output_projection=1,
    output_activation=None
)

Loss and Optimization

In [465]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [466]:
for x_batch, y_batch in train_loader:
    print(x_batch.shape)  # [B, seq_len, 1]
    x_batch = x_batch.permute(0, 2, 1)
    print(x_batch)
    print(y_batch)
    break

torch.Size([32, 40, 1])
tensor([[[0.0682, 0.1900, 0.0430,  ..., 0.0669, 0.0941, 0.2288]],

        [[0.0776, 0.1597, 0.2159,  ..., 0.0633, 0.1646, 0.2042]],

        [[0.1927, 0.1695, 0.0932,  ..., 0.2236, 0.0985, 0.1207]],

        ...,

        [[0.2032, 0.1307, 0.0300,  ..., 0.0388, 0.0751, 0.1329]],

        [[0.0638, 0.1729, 0.0701,  ..., 0.1294, 0.1876, 0.1976]],

        [[0.0735, 0.1586, 0.0563,  ..., 0.2204, 0.2381, 0.2557]]])
tensor([[0.1938],
        [0.2125],
        [0.1010],
        [0.0509],
        [0.0818],
        [0.1434],
        [0.0784],
        [0.0805],
        [0.0916],
        [0.0745],
        [0.1399],
        [0.2099],
        [0.2192],
        [0.1253],
        [0.1266],
        [0.1689],
        [0.1878],
        [0.1786],
        [0.0652],
        [0.1409],
        [0.0781],
        [0.0359],
        [0.1532],
        [0.6891],
        [0.1872],
        [0.0664],
        [0.1218],
        [0.0720],
        [0.1195],
        [0.1696],
        [0.2041],
  

Training

In [467]:
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    
    # train
    for x_batch, y_batch in train_loader:
        # reorders tensors to [batch_size, features, sequence_length] and ensures y is a float
        x_batch = x_batch.permute(0, 2, 1)
        # clears gradients from previous batch
        optimizer.zero_grad()
        
        # forward propagation
        y_pred = model(x_batch)
        
        # y_pred = model(x_batch)
        loss = criterion(y_pred[:, :, -1], y_batch)
        
        # backward propagation
        loss.backward()
        
        # update step and accumulate loss
        optimizer.step()
        train_loss += loss.item() * x_batch.size(0)
        
    train_loss /= len(train_loader.dataset)
    
    # validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            x_batch = x_batch.permute(0, 2, 1)
            y_pred = model(x_batch)
            loss = criterion(y_pred[:, :, -1], y_batch)
            val_loss += loss.item() * x_batch.size(0)
        
        val_loss /= len(val_loader.dataset)
            
    print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch 1/30 - Train Loss: 0.4097, Validation Loss: 0.4963
Epoch 2/30 - Train Loss: 0.4014, Validation Loss: 0.4979
Epoch 3/30 - Train Loss: 0.4009, Validation Loss: 0.4959
Epoch 4/30 - Train Loss: 0.4006, Validation Loss: 0.4958
Epoch 5/30 - Train Loss: 0.4003, Validation Loss: 0.4967
Epoch 6/30 - Train Loss: 0.4003, Validation Loss: 0.4963
Epoch 7/30 - Train Loss: 0.4001, Validation Loss: 0.4964
Epoch 8/30 - Train Loss: 0.4001, Validation Loss: 0.4963
Epoch 9/30 - Train Loss: 0.4001, Validation Loss: 0.4967
Epoch 10/30 - Train Loss: 0.4000, Validation Loss: 0.4960
Epoch 11/30 - Train Loss: 0.4000, Validation Loss: 0.4973
Epoch 12/30 - Train Loss: 0.4000, Validation Loss: 0.4962
Epoch 13/30 - Train Loss: 0.4001, Validation Loss: 0.4972
Epoch 14/30 - Train Loss: 0.4000, Validation Loss: 0.4960
Epoch 15/30 - Train Loss: 0.4000, Validation Loss: 0.4980
Epoch 16/30 - Train Loss: 0.4000, Validation Loss: 0.4960
Epoch 17/30 - Train Loss: 0.4000, Validation Loss: 0.4975
Epoch 18/30 - Train Los

In [468]:
model.eval()
X_test_t = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

with torch.no_grad():
    y_pred = model(X_test_t) 
    
criterion = torch.nn.MSELoss()  
test_loss = criterion(y_pred[:, :, -1], y_test_t).item()

print(f"Test Loss (MSE): {test_loss:.6f}")


Test Loss (MSE): 2.913255


  X_test_t = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1)
  y_test_t = torch.tensor(y_test, dtype=torch.float32)
