<a href="https://colab.research.google.com/github/calmcreek/Multi-Task-Learning/blob/main/encoder_and_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from google.colab import files

# Step 1: Upload parquet file
uploaded = files.upload()  # this opens a dialog to choose your .parquet file

# Step 2: Get filename (first key of uploaded dict)
filename = list(uploaded.keys())[0]

# Step 3: Read parquet file into a DataFrame
df = pd.read_csv(filename)

# Step 4: Inspect structure
print(df.head())   # show first few rows
print(df.info())   # show column names + data types


Saving val.parquet to val.parquet
Saving train.parquet to train.parquet
Saving classifier_dataset.csv to classifier_dataset.csv
Saving ssl_dataset.csv to ssl_dataset.csv


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xee in position 20: invalid continuation byte

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np


In [4]:
# AffectNet features (CSV)
affectnet_df = pd.read_csv("ssl_dataset.csv", low_memory=False)

# DAPPER physiological features (Parquet)
dapper_df = pd.read_parquet("train.parquet")

print("AffectNet shape:", affectnet_df.shape)
print("DAPPER shape:", dapper_df.shape)
print("AffectNet dtypes (sample):\n", affectnet_df.dtypes.head())
print("DAPPER dtypes (sample):\n", dapper_df.dtypes.head())


AffectNet shape: (121, 718)
DAPPER shape: (341248, 63)
AffectNet dtypes (sample):
 frame           int64
face_id         int64
timestamp     float64
confidence    float64
success         int64
dtype: object
DAPPER dtypes (sample):
 participant_id      int64
window_id           int64
start_time          int64
center_time       float64
hr_mean           float64
dtype: object


In [5]:
class PretrainDataset(Dataset):
    def __init__(self, dapper_df, affectnet_df):
        # Work on copies to avoid modifying original dfs
        dapper = dapper_df.copy()
        affectnet = affectnet_df.copy()

        # Drop known label/id columns (if present)
        dapper = dapper.drop(columns=["participant_id", "window_id", "start_time", "center_time", "valence", "arousal", "panas_pos", "panas_neg"], errors="ignore")
        affectnet = affectnet.drop(columns=["frame", "face_id", "timestamp", "emotion", "stress", "valence", "arousal", "confidence", "success"], errors="ignore")

        # Keep only numeric columns (this drops object columns)
        dapper = dapper.select_dtypes(include=[np.number])
        affectnet = affectnet.select_dtypes(include=[np.number])

        # Drop columns that are entirely NaN (like skew/kurtosis columns in your DAPPER preview)
        dapper = dapper.dropna(axis=1, how="all")
        affectnet = affectnet.dropna(axis=1, how="all")

        # Fill remaining NaNs (choose mean or 0 — here we use column mean, fallback to 0)
        dapper = dapper.fillna(dapper.mean()).fillna(0.0)
        affectnet = affectnet.fillna(affectnet.mean()).fillna(0.0)

        # Convert to float32
        self.dapper = dapper.values.astype(np.float32)
        self.affectnet = affectnet.values.astype(np.float32)

        # Keep feature dims for later use
        self.dapper_dim = self.dapper.shape[1]
        self.affectnet_dim = self.affectnet.shape[1]

        # Align lengths: cut to the shorter dataset (simple strategy for paired SSL)
        min_len = min(len(self.dapper), len(self.affectnet))
        self.dapper = self.dapper[:min_len]
        self.affectnet = self.affectnet[:min_len]

    def __len__(self):
        return len(self.dapper)

    def __getitem__(self, idx):
        # returns (physio_tensor, image_features_tensor)
        d = torch.from_numpy(self.dapper[idx])
        a = torch.from_numpy(self.affectnet[idx])
        return d, a

# instantiate loader
pretrain_dataset = PretrainDataset(dapper_df, affectnet_df)
pretrain_loader = DataLoader(pretrain_dataset, batch_size=64, shuffle=True, drop_last=False)

# quick sanity prints
print("Pretrain dataset length:", len(pretrain_dataset))
print("DAPPER feature dim:", pretrain_dataset.dapper_dim)
print("AffectNet feature dim:", pretrain_dataset.affectnet_dim)


Pretrain dataset length: 121
DAPPER feature dim: 55
AffectNet feature dim: 709


In [6]:
class PhysiologicalEncoder(nn.Module):
    def __init__(self, input_dim, embed_dim=128):
        """
        input_dim: number of features per window (treated as sequence length for 1D conv)
        embed_dim: final embedding size
        """
        super().__init__()
        # We'll treat the features as a 1D 'signal' of length=input_dim with 1 channel
        self.input_dim = input_dim
        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2),  # halves length
            nn.Conv1d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)  # collapse length -> 1
        )
        self.fc = nn.Linear(64, embed_dim)

    def forward(self, x):
        """
        x: (batch, input_dim) float tensor
        returns: (batch, embed_dim)
        """
        # ensure float and correct shape
        if x.dim() != 2:
            raise ValueError("Physio encoder expects input shape (batch, features)")
        x = x.unsqueeze(1)  # (batch, 1, features)
        x = self.encoder(x).squeeze(-1)  # (batch, 64)
        out = self.fc(x)
        return out


In [7]:
class ImageEncoder(nn.Module):
    def __init__(self, input_dim, embed_dim=128):
        """
        A simple MLP encoder for precomputed image features (AffectNet row features)
        input_dim: number of feature columns from affectnet after preprocessing
        """
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, embed_dim)
        )

    def forward(self, x):
        # x: (batch, input_dim)
        return self.encoder(x)


In [8]:
# =========================
# Cell 5 – Contrastive Pretraining (InfoNCE loss)
# =========================
def contrastive_loss(z_physio, z_image, temperature=0.1):
    # Normalize embeddings
    z_physio = F.normalize(z_physio, dim=1)
    z_image  = F.normalize(z_image, dim=1)

    # Similarity matrix
    logits = torch.matmul(z_physio, z_image.T) / temperature
    labels = torch.arange(logits.size(0), device=logits.device)

    # Loss physio->image
    loss_i = F.cross_entropy(logits, labels)

    # Loss image->physio (transpose logits)
    loss_t = F.cross_entropy(logits.T, labels)

    return (loss_i + loss_t) / 2.0


In [9]:
class PretrainModel(nn.Module):
    def __init__(self, physio_encoder, image_encoder):
        super().__init__()
        self.physio_encoder = physio_encoder
        self.image_encoder = image_encoder

    def forward(self, physio_x, image_x):
        return self.physio_encoder(physio_x), self.image_encoder(image_x)


In [10]:
# Use feature dims discovered from pretrain_dataset
dapper_dim   = pretrain_dataset.dapper_dim
affectnet_dim = pretrain_dataset.affectnet_dim

# Init encoders + model
physio_encoder = PhysiologicalEncoder(dapper_dim, embed_dim=128)
image_encoder  = ImageEncoder(affectnet_dim, embed_dim=128)
pretrain_model = PretrainModel(physio_encoder, image_encoder)

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrain_model.to(device)

# Optimizer
optimizer = optim.Adam(pretrain_model.parameters(), lr=1e-3)


In [11]:
# Training loop
num_epochs = 5  # increase later
for epoch in range(num_epochs):
    pretrain_model.train()
    epoch_loss = 0.0
    num_batches = 0

    for physio_batch, img_batch in pretrain_loader:
        physio_batch = physio_batch.to(device)
        img_batch    = img_batch.to(device)

        z_physio, z_image = pretrain_model(physio_batch, img_batch)
        loss = contrastive_loss(z_physio, z_image)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        num_batches += 1

    avg_loss = epoch_loss / num_batches
    print(f"Epoch {epoch+1}/{num_epochs} | Avg Contrastive Loss: {avg_loss:.4f}")

# Save encoders for fine-tuning
torch.save(pretrain_model.physio_encoder.state_dict(), "physio_encoder.pt")
torch.save(pretrain_model.image_encoder.state_dict(), "image_encoder.pt")

print("✅ Pretraining finished. Encoders saved!")


Epoch 1/5 | Avg Contrastive Loss: 4.1687
Epoch 2/5 | Avg Contrastive Loss: 4.1340
Epoch 3/5 | Avg Contrastive Loss: 4.1173
Epoch 4/5 | Avg Contrastive Loss: 4.1078
Epoch 5/5 | Avg Contrastive Loss: 4.1045
✅ Pretraining finished. Encoders saved!


In [72]:
# =========================
# Stage 2 – Fine-tuning with Labels
# =========================
# Load labelled data
dapper_df = pd.read_parquet("val.parquet")
affectnet_df = pd.read_csv("classifier_dataset.csv")

print("DAPPER (labelled) shape:", dapper_df.shape)
print("AffectNet (labelled) shape:", affectnet_df.shape)


DAPPER (labelled) shape: (100620, 63)
AffectNet (labelled) shape: (34, 718)


In [73]:
# ---------------------- DAPPER Dataset ----------------------
from sklearn.preprocessing import StandardScaler
class DapperDataset(Dataset):
    def __init__(self, df):
        # Features
        self.X = df.drop(columns=["valence","arousal","panas_pos","panas_neg"], errors="ignore").values.astype("float32")

        # Standardize features
        self.scaler_X = StandardScaler()
        self.X = self.scaler_X.fit_transform(self.X)

        # Labels
        self.y_va = df[["valence","arousal"]].values.astype("float32")
        self.y_panas = df[["panas_pos","panas_neg"]].values.astype("float32")

        # Standardize labels
        self.scaler_va = StandardScaler()
        self.y_va = self.scaler_va.fit_transform(self.y_va)

        self.scaler_panas = StandardScaler()
        self.y_panas = self.scaler_panas.fit_transform(self.y_panas)

        # Replace any remaining NaN or Inf
        self.X = np.nan_to_num(self.X, nan=0.0, posinf=1e6, neginf=-1e6)
        self.y_va = np.nan_to_num(self.y_va, nan=0.0, posinf=1e6, neginf=-1e6)
        self.y_panas = np.nan_to_num(self.y_panas, nan=0.0, posinf=1e6, neginf=-1e6)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = torch.tensor(self.X[idx], dtype=torch.float32)
        labels = {
            "va": torch.tensor(self.y_va[idx], dtype=torch.float32),
            "panas": torch.tensor(self.y_panas[idx], dtype=torch.float32)
        }
        return x, labels

# ---------------------- AffectNet Dataset ----------------------
class AffectNetDataset(Dataset):
    def __init__(self, df, target_dim=709):
        from sklearn.preprocessing import LabelEncoder
        self.label_encoder = LabelEncoder()

        # Features
        self.X = df.drop(columns=["frame","face_id","timestamp","emotion","stress","valence","arousal"], errors="ignore").values.astype("float32")

        # Standardize features
        self.scaler_X = StandardScaler()
        self.X = self.scaler_X.fit_transform(self.X)

        # Pad / truncate to match target_dim
        if self.X.shape[1] > target_dim:
            self.X = self.X[:, :target_dim]
        elif self.X.shape[1] < target_dim:
            pad_width = target_dim - self.X.shape[1]
            self.X = np.pad(self.X, ((0,0),(0,pad_width)), mode="constant")

        # Labels
        self.y_emotion = self.label_encoder.fit_transform(df["emotion"]).astype("int64")
        self.y_va = df[["valence","arousal"]].values.astype("float32")

        # Standardize VA labels
        self.scaler_va = StandardScaler()
        self.y_va = self.scaler_va.fit_transform(self.y_va)

        # Replace any remaining NaN / Inf
        self.X = np.nan_to_num(self.X, nan=0.0, posinf=1e6, neginf=-1e6)
        self.y_va = np.nan_to_num(self.y_va, nan=0.0, posinf=1e6, neginf=-1e6)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = torch.tensor(self.X[idx], dtype=torch.float32)
        labels = {
            "emotion": torch.tensor(self.y_emotion[idx], dtype=torch.long),
            "va": torch.tensor(self.y_va[idx], dtype=torch.float32),
            "panas": torch.zeros(2, dtype=torch.float32)  # dummy PANAS
        }
        return x, labels


In [74]:
dapper = DapperDataset(dapper_df)
print("VA stats:", dapper.y_va.min(), dapper.y_va.max(), np.mean(dapper.y_va))
print("PANAS stats:", dapper.y_panas.min(), dapper.y_panas.max(), np.mean(dapper.y_panas))

affectnet = AffectNetDataset(affectnet_df)
print("VA stats (AffectNet):", affectnet.y_va.min(), affectnet.y_va.max(), np.mean(affectnet.y_va))


VA stats: -2.8387682 1.9938319 6.55118e-08
PANAS stats: -1.9258921 1.8173043 -1.9410903e-08
VA stats (AffectNet): -1.2135599 2.8 -4.2073868e-08


In [75]:
print("Dapper columns:", dapper_df.columns.tolist())
print("AffectNet columns:", affectnet_df.columns.tolist())

affectnet_dataset = AffectNetDataset(affectnet_df)
print("Emotion classes:", affectnet_dataset.label_encoder.classes_)

Dapper columns: ['participant_id', 'window_id', 'start_time', 'center_time', 'hr_mean', 'hr_std', 'hr_skew', 'hr_kurtosis', 'hr_median', 'hr_iqr', 'hr_min', 'hr_max', 'hr_pwr_total', 'hr_pwr_band_1', 'hr_pwr_band_2', 'gsr_mean', 'gsr_std', 'gsr_skew', 'gsr_kurtosis', 'gsr_median', 'gsr_iqr', 'gsr_min', 'gsr_max', 'gsr_pwr_total', 'gsr_pwr_band_1', 'gsr_pwr_band_2', 'acc_x_mean', 'acc_x_std', 'acc_x_skew', 'acc_x_kurtosis', 'acc_x_median', 'acc_x_iqr', 'acc_x_min', 'acc_x_max', 'acc_x_pwr_total', 'acc_x_pwr_band_1', 'acc_x_pwr_band_2', 'acc_y_mean', 'acc_y_std', 'acc_y_skew', 'acc_y_kurtosis', 'acc_y_median', 'acc_y_iqr', 'acc_y_min', 'acc_y_max', 'acc_y_pwr_total', 'acc_y_pwr_band_1', 'acc_y_pwr_band_2', 'acc_z_mean', 'acc_z_std', 'acc_z_skew', 'acc_z_kurtosis', 'acc_z_median', 'acc_z_iqr', 'acc_z_min', 'acc_z_max', 'acc_z_pwr_total', 'acc_z_pwr_band_1', 'acc_z_pwr_band_2', 'valence', 'arousal', 'panas_pos', 'panas_neg']
AffectNet columns: ['frame', 'face_id', 'timestamp', 'confidence'

In [76]:
dapper_dataset = DapperDataset(dapper_df)
affectnet_dataset = AffectNetDataset(affectnet_df)

dapper_loader = DataLoader(dapper_dataset, batch_size=64, shuffle=True)
affectnet_loader = DataLoader(affectnet_dataset, batch_size=64, shuffle=True)

In [77]:
def clean_features(df, drop_cols):
    df = df.drop(columns=drop_cols, errors="ignore")
    df = df.select_dtypes(include=[np.number])
    df = df.dropna(axis=1, how="all")
    df = df.fillna(df.mean()).fillna(0.0)
    return df.astype(np.float32)


In [78]:
# --- Clean features consistently with pretraining ---
dapper_clean = clean_features(
    dapper_df,
    drop_cols=["participant_id", "window_id", "start_time", "center_time",
               "valence", "arousal", "panas_pos", "panas_neg"]
)
affectnet_clean = clean_features(
    affectnet_df,
    drop_cols=["frame", "face_id", "timestamp", "emotion", "stress",
               "valence", "arousal", "confidence", "success"]
)

# Get feature dims (must match pretrained encoders)
dapper_dim = dapper_clean.shape[1]
affectnet_dim = affectnet_clean.shape[1]

print(f"DAPPER dim = {dapper_dim}, AffectNet dim = {affectnet_dim}")

# --- Load encoders ---
physio_encoder = PhysiologicalEncoder(dapper_dim)
physio_encoder.load_state_dict(torch.load("physio_encoder.pt", map_location=device))
physio_encoder.eval()  # eval mode

image_encoder = ImageEncoder(affectnet_dim)
image_encoder.load_state_dict(torch.load("image_encoder.pt", map_location=device))
image_encoder.eval()

# --- Multi-task classifier ---
class MultiTaskClassifier(nn.Module):
    def __init__(self, physio_encoder, image_encoder, embed_dim=128, num_emotions=7):
        super().__init__()
        self.physio_encoder = physio_encoder
        self.image_encoder = image_encoder

        # freeze encoders (no gradient updates)
        for p in self.physio_encoder.parameters():
            p.requires_grad = False
        for p in self.image_encoder.parameters():
            p.requires_grad = False

        # Task-specific heads
        self.emotion_head = nn.Linear(embed_dim, num_emotions)   # CE loss
        self.va_head      = nn.Linear(embed_dim, 2)              # MSE loss
        self.panas_head   = nn.Linear(embed_dim, 2)              # MSE loss

    def forward(self, x, modality="physio"):
        if modality == "physio":
            z = self.physio_encoder(x)
        else:
            z = self.image_encoder(x)

        return {
            "emotion": self.emotion_head(z),
            "va": self.va_head(z),
            "panas": self.panas_head(z)
        }

# --- Build model ---
model = MultiTaskClassifier(
    physio_encoder, image_encoder, embed_dim=128, num_emotions=7
).to(device)

DAPPER dim = 55, AffectNet dim = 709


In [79]:
# ---------------------- Loss Function ----------------------
def compute_loss(outputs, labels, modality):
    loss = 0.0
    if modality == "image" and "emotion" in labels:
        loss += nn.CrossEntropyLoss()(outputs["emotion"], labels["emotion"])
    if "va" in labels:
        loss += nn.MSELoss()(torch.nan_to_num(outputs["va"]), torch.nan_to_num(labels["va"]))
    if "panas" in labels:
        loss += nn.MSELoss()(torch.nan_to_num(outputs["panas"]), torch.nan_to_num(labels["panas"]))
    return loss

In [82]:
def train(model, dapper_loader, affectnet_loader, device, lr=1e-3, num_epochs=5):
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        total_loss, steps = 0.0, 0

        # --- DAPPER ---
        for physio_x, physio_labels in dapper_loader:
            physio_x = physio_x.to(device)
            labels = {k: v.to(device) for k, v in physio_labels.items()}

            outputs = model(physio_x, modality="physio")

            # Skip batch if NaN in input or output
            if torch.isnan(physio_x).any() or any(torch.isnan(v).any() for v in outputs.values()):
                print("NaN detected in DAPPER batch – skipping")
                continue

            loss = compute_loss(outputs, labels, modality="physio")
            optimizer.zero_grad()
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
            optimizer.step()

            total_loss += loss.item()
            steps += 1

        # --- AffectNet ---
        for image_x, image_labels in affectnet_loader:
            image_x = image_x.to(device)
            labels = {k: v.to(device) for k, v in image_labels.items()}

            outputs = model(image_x, modality="image")

            if torch.isnan(image_x).any() or any(torch.isnan(v).any() for v in outputs.values()):
                print("NaN detected in AffectNet batch – skipping")
                continue

            loss = compute_loss(outputs, labels, modality="image")
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
            optimizer.step()

            total_loss += loss.item()
            steps += 1

        print(f"Epoch {epoch+1}/{num_epochs} | Avg Loss: {total_loss/steps:.4f}")

# Instantiate datasets
dapper_dataset = DapperDataset(dapper_df)
affectnet_dataset = AffectNetDataset(affectnet_df, target_dim=pretrain_dataset.affectnet_dim)

# DataLoaders
dapper_loader = DataLoader(dapper_dataset, batch_size=32, shuffle=True)
affectnet_loader = DataLoader(affectnet_dataset, batch_size=32, shuffle=True)

# Move model to device
model.to(device)

# Call training
train(
    model=model,
    dapper_loader=dapper_loader,
    affectnet_loader=affectnet_loader,
    device=device,
    lr=1e-3,
    num_epochs=5
)


Epoch 1/5 | Avg Loss: 1.7870
Epoch 2/5 | Avg Loss: 1.6472
Epoch 3/5 | Avg Loss: 1.6016
Epoch 4/5 | Avg Loss: 1.5727
Epoch 5/5 | Avg Loss: 1.5550
