In [101]:
import pandas as pd
import numpy as np
import tensorflow as tf 
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from pytorch_tcn import TCN
from keras import layers, models



Get Data

In [102]:
df = pd.read_csv("./simulated_dataset/all_data.csv")

df_pivot = df.pivot(index="timestamp", columns="sensor_id", values=["temperature", "label"])
df_pivot = df_pivot.reset_index(drop=True)

df_pivot.head()

sensor_dfs = {}

for i in range(1, 5):
    sensor_df = df_pivot.xs(i, axis=1, level=1).copy()
    
    # fill temperature NaNs with mean
    sensor_df['temperature'] = sensor_df['temperature'].fillna(sensor_df['temperature'].mean())
    
    # fill label NaNs with mode (most common value)
    sensor_df['label'] = sensor_df['label'].fillna(sensor_df['label'].mode()[0])
    
    sensor_dfs[i] = sensor_df

sensor = 2

df = sensor_dfs[sensor]
df

Unnamed: 0,temperature,label
0,50.073250,0.0
1,51.007613,0.0
2,49.634052,0.0
3,46.979169,0.0
4,45.635984,0.0
...,...,...
21596,52.232443,0.0
21597,51.719373,0.0
21598,51.326107,0.0
21599,52.967035,0.0


Normalize

In [103]:
scaler = MinMaxScaler()
df['temperature']
df['temperature'] = scaler.fit_transform(df[['temperature']])

Create Sequences

In [104]:
# number of past timesteps the TCN sees
seq_length = 40

# creates sequences based on data with window size == seq_length
def create_sequences(df, seq_length=20):
    '''
    Sequences are needed for giving the model information about history, since feed-forward networks do not know the order of the data.
    TCN's expect input of shape [batch_size, num_features, sequence_length]
    
    Args:
        df: time-series data
        seq_length: length of each sequence
    
    Returns:
        X (shape: [num_samples, seq_length, num_inputs]), y (shape: [num_samples, num_inputs])
    '''
    values = df
    X, y = [], []
    for i in range(len(values) - seq_length):
        # average of current sequence
        X.append(values[i:i + seq_length])
        # next value after sequence
        y.append(values[i + seq_length])
    X = np.array(X)
    y = np.array(y)
    
    return X, y

weights = np.ones(500) / 500
meanDf = np.convolve(df['temperature'], weights, mode='valid')

X, y = create_sequences(meanDf, seq_length)

X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)



In [105]:
X.shape

torch.Size([21062, 40, 1])

Train/Validation/Test Split

In [106]:

train_size = int(.8 * len(X))
val_size = int(.1 * len(X))

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size]
X_test, y_test = X[train_size + val_size:], y[train_size + val_size:]

print(X_train[0])
print(y_train[0])
print(X_train[1])
print(y_train[1])

tensor([[0.1506],
        [0.1504],
        [0.1504],
        [0.1500],
        [0.1500],
        [0.1500],
        [0.1502],
        [0.1500],
        [0.1498],
        [0.1500],
        [0.1500],
        [0.1499],
        [0.1501],
        [0.1500],
        [0.1500],
        [0.1499],
        [0.1497],
        [0.1500],
        [0.1502],
        [0.1502],
        [0.1501],
        [0.1501],
        [0.1501],
        [0.1500],
        [0.1500],
        [0.1501],
        [0.1499],
        [0.1499],
        [0.1499],
        [0.1499],
        [0.1502],
        [0.1502],
        [0.1500],
        [0.1498],
        [0.1497],
        [0.1497],
        [0.1500],
        [0.1501],
        [0.1499],
        [0.1499]])
tensor([0.1500])
tensor([[0.1504],
        [0.1504],
        [0.1500],
        [0.1500],
        [0.1500],
        [0.1502],
        [0.1500],
        [0.1498],
        [0.1500],
        [0.1500],
        [0.1499],
        [0.1501],
        [0.1500],
        [0.1500],
        [0

DataLoaders

In [107]:
batch_size = 32

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
# test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)

Build CNN

In [108]:
num_inputs = 1
num_channels = [16, 32, 16]
kernel_size = 4
dilations = [1, 2, 4]
dropout = 0.1

model = TCN(
    num_inputs=num_inputs,
    num_channels=num_channels,
    kernel_size=kernel_size,
    dilations=dilations,
    dropout=dropout,
    causal=True,
    use_skip_connections=True,
    output_projection=1,
    output_activation=None
)

Loss and Optimization

In [109]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [110]:
for x_batch, y_batch in train_loader:
    print(x_batch.shape)  # [B, seq_len, 1]
    x_batch = x_batch.permute(0, 2, 1)
    print(x_batch[0][0])
    print(torch.mean(x_batch[0][0]))
    print(y_batch)
    print(x_batch[1])
    break

torch.Size([32, 40, 1])
tensor([0.1792, 0.1789, 0.1789, 0.1789, 0.1787, 0.1787, 0.1789, 0.1790, 0.1791,
        0.1790, 0.1793, 0.1791, 0.1790, 0.1790, 0.1789, 0.1790, 0.1789, 0.1790,
        0.1789, 0.1791, 0.1793, 0.1795, 0.1794, 0.1795, 0.1794, 0.1793, 0.1793,
        0.1793, 0.1792, 0.1789, 0.1790, 0.1793, 0.1791, 0.1791, 0.1793, 0.1794,
        0.1796, 0.1795, 0.1794, 0.1792])
tensor(0.1791)
tensor([[0.1793],
        [0.2236],
        [0.1999],
        [0.1719],
        [0.2439],
        [0.2008],
        [0.1944],
        [0.1977],
        [0.4024],
        [0.2378],
        [0.2308],
        [0.2025],
        [0.2444],
        [0.4042],
        [0.2250],
        [0.3590],
        [0.1537],
        [0.2298],
        [0.2121],
        [0.2060],
        [0.2059],
        [0.2379],
        [0.2114],
        [0.1726],
        [0.1735],
        [0.1777],
        [0.2067],
        [0.1747],
        [0.1530],
        [0.1761],
        [0.2492],
        [0.1870]])
tensor([[0.2232, 0.2230

Training

In [111]:
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    
    # train
    for x_batch, y_batch in train_loader:
        # reorders tensors to [batch_size, features, sequence_length] and ensures y is a float
        x_batch = x_batch.permute(0, 2, 1)
        
        # clears gradients from previous batch
        optimizer.zero_grad()
        
        # forward propagation
        y_pred = model(x_batch)
        
        # calculate loss
        loss = criterion(y_pred[:, :, -1], y_batch)
        
        # backward propagation
        loss.backward()
        
        # update step and accumulate loss
        optimizer.step()
        train_loss += loss.item() * x_batch.size(0)
        
    train_loss /= len(train_loader.dataset)
    
    # validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            x_batch = x_batch.permute(0, 2, 1)
            y_pred = model(x_batch)
            loss = criterion(y_pred[:, :, -1], y_batch)
            val_loss += loss.item() * x_batch.size(0)
        
        val_loss /= len(val_loader.dataset)
            
    print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

Epoch 1/30 - Train Loss: 0.5100, Validation Loss: 0.5713
Epoch 2/30 - Train Loss: 0.5054, Validation Loss: 0.5713
Epoch 3/30 - Train Loss: 0.5052, Validation Loss: 0.5713
Epoch 4/30 - Train Loss: 0.5051, Validation Loss: 0.5713
Epoch 5/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 6/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 7/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 8/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 9/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 10/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 11/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 12/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 13/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 14/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 15/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 16/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 17/30 - Train Loss: 0.5050, Validation Loss: 0.5713
Epoch 18/30 - Train Los

In [112]:
model.eval()
X_test_t = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

with torch.no_grad():
    y_pred = model(X_test_t) 
    
criterion = torch.nn.MSELoss()  
test_loss = criterion(y_pred[:, :, -1], y_test_t).item()

print(f"Test Loss (MSE): {test_loss:.6f}")


Test Loss (MSE): 1.595295


  X_test_t = torch.tensor(X_test, dtype=torch.float32).permute(0, 2, 1)
  y_test_t = torch.tensor(y_test, dtype=torch.float32)
