Transformer + SMOTE

In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import KFold
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
import math


pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [23]:
SEED = 0

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [24]:
#We first read the 2 data files
df1 = pd.read_csv('pirvision_office_dataset1.csv')
df2 = pd.read_csv('pirvision_office_dataset2.csv')

In [25]:
# #We first shuffle these 2 dataframes
# df1 = df1.sample(frac = 1, random_state=1).reset_index(drop=True)
# df2 = df2.sample(frac = 1, random_state=1).reset_index(drop=True)

#We now merge these 2 dataframes
df = pd.concat([df1, df2], ignore_index=True)

#We print the shapes of all datafmrames
print(df1.shape, df2.shape, df.shape)

#Displaying the merged dataframe
display(df.head(5))


(7651, 59) (7651, 59) (15302, 59)


Unnamed: 0,Date,Time,Label,Temperature_F,PIR_1,PIR_2,PIR_3,PIR_4,PIR_5,PIR_6,PIR_7,PIR_8,PIR_9,PIR_10,PIR_11,PIR_12,PIR_13,PIR_14,PIR_15,PIR_16,PIR_17,PIR_18,PIR_19,PIR_20,PIR_21,PIR_22,PIR_23,PIR_24,PIR_25,PIR_26,PIR_27,PIR_28,PIR_29,PIR_30,PIR_31,PIR_32,PIR_33,PIR_34,PIR_35,PIR_36,PIR_37,PIR_38,PIR_39,PIR_40,PIR_41,PIR_42,PIR_43,PIR_44,PIR_45,PIR_46,PIR_47,PIR_48,PIR_49,PIR_50,PIR_51,PIR_52,PIR_53,PIR_54,PIR_55
0,2024-08-08,19:19:56,0,86,10269,10721,11156,11170,10931,10671,10395,10133,9885,9705,9538,9418,9469,9599,9817,9910,9890,10075,10231,10247,10271,10229,10272,10354,10449,10451,10419,10409,10336,10306,10356,10461,10456,10460,10467,10422,10303,9877,9308,9061,9299,9748,10209,10615,10975,11178,11197,11161,11096,10957,10839,10735,10590,10411,10329
1,2024-08-08,19:20:12,1,86,10364,10907,11299,11238,10867,10535,10173,9950,9856,9795,9714,9702,9792,9789,9915,9900,9944,9964,9971,10059,10161,10234,10285,10309,10384,10464,10450,10427,10366,10361,10452,10502,10444,10337,10250,10313,10211,9718,9236,9193,9609,10022,10431,10798,11055,11122,11145,11136,11108,11041,10824,10645,10493,10398,10357
2,2024-08-08,19:20:28,0,86,10329,10793,11197,11242,11052,10658,10288,9988,9819,9711,9659,9626,9726,9752,9835,9942,9925,9965,10110,10174,10140,10235,10303,10365,10366,10379,10375,10287,10310,10345,10373,10328,10387,10415,10491,10421,10432,9964,9368,9135,9287,9643,10184,10663,11016,11168,11204,11162,11109,11007,10867,10700,10533,10427,10265
3,2024-08-08,19:20:44,0,86,10169,10425,10822,11133,11136,10834,10520,10228,9986,9848,9643,9562,9591,9618,9718,9849,9857,10026,10150,10198,10261,10351,10425,10469,10374,10344,10303,10293,10294,10333,10353,10345,10354,10362,10375,10369,10319,10115,9603,9182,9125,9560,10161,10560,10883,11116,11273,11186,10984,10910,10807,10714,10651,10562,10463
4,2024-08-08,19:21:00,0,86,10320,10667,11104,11234,11129,10814,10453,10040,9733,9630,9578,9476,9596,9748,9755,9823,10004,10048,10202,10234,10255,10282,10298,10319,10315,10270,10334,10400,10428,10514,10529,10453,10374,10303,10298,10238,10246,9918,9399,9198,9422,9848,10225,10615,10860,11006,11257,11370,11173,10924,10816,10754,10588,10428,10407


In [26]:
# Ensure 'Date' and 'Time' are parsed correctly
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])

# Extract temporal features
df['Hour'] = df['Datetime'].dt.hour
df['Minute'] = df['Datetime'].dt.minute
df['DayOfWeek'] = df['Datetime'].dt.dayofweek
df['Month'] = df['Datetime'].dt.month

meta_features = ['Hour', 'Minute', 'DayOfWeek', 'Month', 'Temperature_F']

In [27]:
display(df.head(5))

Unnamed: 0,Date,Time,Label,Temperature_F,PIR_1,PIR_2,PIR_3,PIR_4,PIR_5,PIR_6,PIR_7,PIR_8,PIR_9,PIR_10,PIR_11,PIR_12,PIR_13,PIR_14,PIR_15,PIR_16,PIR_17,PIR_18,PIR_19,PIR_20,PIR_21,PIR_22,PIR_23,PIR_24,PIR_25,PIR_26,PIR_27,PIR_28,PIR_29,PIR_30,PIR_31,PIR_32,PIR_33,PIR_34,PIR_35,PIR_36,PIR_37,PIR_38,PIR_39,PIR_40,PIR_41,PIR_42,PIR_43,PIR_44,PIR_45,PIR_46,PIR_47,PIR_48,PIR_49,PIR_50,PIR_51,PIR_52,PIR_53,PIR_54,PIR_55,Datetime,Hour,Minute,DayOfWeek,Month
0,2024-08-08,19:19:56,0,86,10269,10721,11156,11170,10931,10671,10395,10133,9885,9705,9538,9418,9469,9599,9817,9910,9890,10075,10231,10247,10271,10229,10272,10354,10449,10451,10419,10409,10336,10306,10356,10461,10456,10460,10467,10422,10303,9877,9308,9061,9299,9748,10209,10615,10975,11178,11197,11161,11096,10957,10839,10735,10590,10411,10329,2024-08-08 19:19:56,19,19,3,8
1,2024-08-08,19:20:12,1,86,10364,10907,11299,11238,10867,10535,10173,9950,9856,9795,9714,9702,9792,9789,9915,9900,9944,9964,9971,10059,10161,10234,10285,10309,10384,10464,10450,10427,10366,10361,10452,10502,10444,10337,10250,10313,10211,9718,9236,9193,9609,10022,10431,10798,11055,11122,11145,11136,11108,11041,10824,10645,10493,10398,10357,2024-08-08 19:20:12,19,20,3,8
2,2024-08-08,19:20:28,0,86,10329,10793,11197,11242,11052,10658,10288,9988,9819,9711,9659,9626,9726,9752,9835,9942,9925,9965,10110,10174,10140,10235,10303,10365,10366,10379,10375,10287,10310,10345,10373,10328,10387,10415,10491,10421,10432,9964,9368,9135,9287,9643,10184,10663,11016,11168,11204,11162,11109,11007,10867,10700,10533,10427,10265,2024-08-08 19:20:28,19,20,3,8
3,2024-08-08,19:20:44,0,86,10169,10425,10822,11133,11136,10834,10520,10228,9986,9848,9643,9562,9591,9618,9718,9849,9857,10026,10150,10198,10261,10351,10425,10469,10374,10344,10303,10293,10294,10333,10353,10345,10354,10362,10375,10369,10319,10115,9603,9182,9125,9560,10161,10560,10883,11116,11273,11186,10984,10910,10807,10714,10651,10562,10463,2024-08-08 19:20:44,19,20,3,8
4,2024-08-08,19:21:00,0,86,10320,10667,11104,11234,11129,10814,10453,10040,9733,9630,9578,9476,9596,9748,9755,9823,10004,10048,10202,10234,10255,10282,10298,10319,10315,10270,10334,10400,10428,10514,10529,10453,10374,10303,10298,10238,10246,9918,9399,9198,9422,9848,10225,10615,10860,11006,11257,11370,11173,10924,10816,10754,10588,10428,10407,2024-08-08 19:21:00,19,21,3,8


In [28]:
import torch
import torch.nn as nn
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # (1, max_len, d_model)
        self.register_buffer("pe", pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]


class HybridTransformer(nn.Module):
    def __init__(self, input_size=1, meta_input_size=5, seq_len=55, d_model=64, nhead=4, num_layers=9, num_classes=3):
        super().__init__()
        self.input_proj = nn.Linear(input_size, d_model)  # PIR projection
        self.pos_encoder = PositionalEncoding(d_model, max_len=seq_len)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=128, dropout=0.1, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.meta_mlp = nn.Sequential(
            nn.Linear(meta_input_size, 32),
            nn.ReLU(),
            nn.Linear(32, d_model),
            nn.ReLU()
        )

        self.classifier = nn.Sequential(
            nn.Linear(d_model * 2, 64),  # Transformer + Metadata
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x_seq, x_meta):
        # x_seq: (batch_size, seq_len, 1)
        x_seq = self.input_proj(x_seq)         # (B, seq_len, d_model)
        x_seq = self.pos_encoder(x_seq)        # Add positional encoding
        x_seq = self.transformer_encoder(x_seq)  # (B, seq_len, d_model)
        x_seq = x_seq.mean(dim=1)              # Mean pooling over time

        # x_meta: (batch_size, meta_input_size)
        x_meta = self.meta_mlp(x_meta)         # → (B, d_model)

        combined = torch.cat([x_seq, x_meta], dim=1)  # (B, 2*d_model)
        return self.classifier(combined)              # (B, num_classes)


In [29]:
print("Class distribution in df:")
print(df["Label"].value_counts())


Class distribution in df:
Label
0    12494
1     1666
3     1142
Name: count, dtype: int64


In [30]:
PIR_columns = [f'PIR_{i}' for i in range(1, 56)]

In [31]:
import torch
from torch.utils.data import Dataset, DataLoader

from torch.utils.data import Dataset
import torch
from sklearn.metrics import classification_report, f1_score

class HybridTimeSeriesDataset(Dataset):
    def __init__(self, X_seq, X_meta, y):
        self.X_seq = X_seq
        self.X_meta = X_meta
        self.y = y

    def __len__(self):
        return len(self.X_seq)

    def __getitem__(self, idx):
        sequence = self.X_seq[idx]                # shape: (55,)
        meta = self.X_meta[idx]                   # shape: (9,)
        label = self.y[idx]

        sequence_tensor = torch.tensor(sequence, dtype=torch.float32).unsqueeze(1)  # (55, 1)
        meta_tensor = torch.tensor(meta, dtype=torch.float32)                       # (9,)
        label_tensor = torch.tensor(label, dtype=torch.long)

        return sequence_tensor, meta_tensor, label_tensor

pir_columns = [f'PIR_{i}' for i in range(1, 56)]
X_seq = df[pir_columns].values

# Metadata features
X_meta = df[meta_features].values

# Labels (remapped as before)
label_map = {0: 0, 1: 1, 3: 2}
y_raw = df["Label"].values
y = np.array([label_map[label] for label in y_raw])
kf = KFold(n_splits=5, shuffle=True, random_state=0)

input_size = 1

# Calculate class weights
from sklearn.utils.class_weight import compute_class_weight

classes = np.unique(y)
class_weights = compute_class_weight(class_weight="balanced", classes=classes, y=y)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32)

# Use in CrossEntropyLoss
# criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
criterion = nn.CrossEntropyLoss()

nb_epochs = 8
batch_size = 64  # you can adjust this based on memory
accs = []
f1_scores = []

for fold, (train_i, test_i) in enumerate(kf.split(X_seq), 1):
    print(f"\nFold {fold}")

    model = HybridTransformer(input_size=input_size)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    X_seq_train, X_seq_test = X_seq[train_i], X_seq[test_i]
    X_meta_train, X_meta_test = X_meta[train_i], X_meta[test_i]
    y_train, y_test = y[train_i], y[test_i]

    X_seq_train = pd.DataFrame(X_seq_train, columns=PIR_columns)
    X_meta_train = pd.DataFrame(X_meta_train, columns=meta_features)

    X_dummy = pd.concat([X_seq_train, X_meta_train], axis=1)

    

    #Random Oversampler
    from imblearn.over_sampling import RandomOverSampler
    from imblearn.over_sampling import SMOTE
    # ros = RandomOverSampler(random_state=0)
    # X_dummy, y_train = ros.fit_resample(X_dummy, y_train)

    #SMOTE
    smote = SMOTE(random_state=42)
    X_dummy, y_train = smote.fit_resample(X_dummy, y_train)

    X_seq_train = X_dummy[PIR_columns].values
    X_meta_train = X_dummy[meta_features].values


    print("Train label distribution:", np.bincount(y_train))
    print("Test label distribution:", np.bincount(y_test))

    train_dataset = HybridTimeSeriesDataset(X_seq_train, X_meta_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(nb_epochs):
        total_loss = 0.0
        model.train()
        for sequences, metas, labels in train_loader:
            output = model(sequences, metas)  # Forward pass
            loss = criterion(output, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * sequences.size(0)

        avg_loss = total_loss / len(train_dataset)
        print(f"epoch {epoch+1}, loss: {avg_loss:.4f}")

    # --- Evaluation ---
    model.eval()
    y_true = []
    y_pred = []
    correct = 0
    total = 0
    class_counts = {0: 0, 1: 0, 2: 0}
    class_correct = {0: 0, 1: 0, 2: 0}

    with torch.no_grad():
        for i in range(X_seq_test.shape[0]):
            sequence_tensor = torch.tensor(X_seq_test[i], dtype=torch.float32).unsqueeze(0).unsqueeze(2)
            meta_tensor = torch.tensor(X_meta_test[i], dtype=torch.float32).unsqueeze(0)
            label = y_test[i]

            output = model(sequence_tensor, meta_tensor)
            predicted_class = torch.argmax(output, dim=1).item()

            y_true.append(label)
            y_pred.append(predicted_class)

            class_counts[label] += 1
            if predicted_class == label:
                correct += 1
                class_correct[label] += 1
            total += 1

    accuracy = correct / total
    print(f"\nfold {fold} test accuracy: {accuracy:.4f}")
    accs.append(accuracy)

    print("\nPer-class accuracy:")
    for cls in class_counts:
        total_cls = class_counts[cls]
        correct_cls = class_correct[cls]
        acc_cls = correct_cls / total_cls if total_cls > 0 else 0.0
        print(f"  Class {cls}: {correct_cls}/{total_cls} correct ({acc_cls * 100:.2f}%)")

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, digits=4))

    macro_f1 = f1_score(y_true, y_pred, average='macro')
    f1_scores.append(macro_f1)
    print(f"Macro F1-Score: {macro_f1:.4f}")

    avg_acc = np.mean(accs)
    avg_f1 = np.mean(f1_scores)
    print(f"\nAverage accuracy until fold {fold} is : {avg_acc:.4f}")
    print(f"Average F1-Score until fold {fold} is : {avg_f1:.4f}")


Fold 1
Train label distribution: [10041 10041 10041]
Test label distribution: [2453  345  263]
epoch 1, loss: 0.2649
epoch 2, loss: 0.1392
epoch 3, loss: 0.1304
epoch 4, loss: 0.1269
epoch 5, loss: 0.1173
epoch 6, loss: 0.1126
epoch 7, loss: 0.1069
epoch 8, loss: 0.1035

fold 1 test accuracy: 0.9510

Per-class accuracy:
  Class 0: 2368/2453 correct (96.53%)
  Class 1: 280/345 correct (81.16%)
  Class 2: 263/263 correct (100.00%)

Classification Report:
              precision    recall  f1-score   support

           0     0.9733    0.9653    0.9693      2453
           1     0.7671    0.8116    0.7887       345
           2     1.0000    1.0000    1.0000       263

    accuracy                         0.9510      3061
   macro avg     0.9135    0.9256    0.9193      3061
weighted avg     0.9523    0.9510    0.9516      3061

Macro F1-Score: 0.9193

Average accuracy until fold 1 is : 0.9510
Average F1-Score until fold 1 is : 0.9193

Fold 2
Train label distribution: [9991 9991 9991]
Te

KeyboardInterrupt: 