# Imports

In [19]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
import sys
sys.path.append('/home/hice1/mbibars3/scratch/vlm-debiasing/VLM-Debiasing-Project/scripts')

In [21]:
%autoreload 2
import pandas as pd
import numpy as np
import os
import model as m
import torch
import torch.nn as nn
import torch.optim as optim
import loaders
from torch.utils.data import Dataset, DataLoader

# Text-Audio-Visual

# 

- text: Roberta
- audio: AST
- visual: densenet features

In [22]:
features_csv = pd.read_csv("/home/hice1/mbibars3/scratch/vlm-debiasing/data/e-daic/audio_text_visual_paths.csv")
features_csv.head()

Unnamed: 0,split,PTSD_severity,PTSD_label,gender,audio,text,visual
0,train,22.0,0,female,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...
1,test,23.0,0,male,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...
2,train,19.0,0,male,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...
3,train,67.0,1,female,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...
4,dev,39.0,0,male,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...,/home/hice1/mbibars3/scratch/vlm-debiasing/dat...


In [23]:
train_loader = loaders.MultiModalityDataset(features_csv[features_csv["split"]=="train"], 
                                            modalities = {"audio"}, label = "PTSD_severity")
dataloader = DataLoader(train_loader, batch_size=16, collate_fn=loaders.collate_fn)

for batch in dataloader:
    modalities, labels = batch
    print(f"Modality 1 shape: {modalities[0].shape}")  # Expected shape: (batch_size, feature, 1)
    #print(f"Modality 2 shape: {modalities[1].shape}")
    #print(f"Modality 3 shape: {modalities[2].shape}")
    print(f"Labels shape: {labels}")
    break

Modality 1 shape: torch.Size([16, 768, 1])
Labels shape: tensor([22., 19., 67., 17., 25., 25., 50., 34., 19., 26., 28., 44., 61., 21.,
        67., 36.])


In [24]:
# Define input dimensions for each modality
input_dims = [768]  # These are the feature dimensions for each modality

# Initialize MultiModalPerceiver model
model = m.MultiModalPerceiver(
    input_dims=input_dims,
    input_channels=1,
    input_axis=1,
    projection_dim=256,
    num_latents=16,
    latent_dim=128,
    depth=8,
    cross_heads=8,
    latent_heads=8,
    cross_dim_head=32,
    latent_dim_head=32,
    attn_dropout=0.1,
    ff_dropout=0.0,
    output_dim=1,
    weight_tie_layers=True,
    fourier_encode_data=False,
    max_freq=10,
    num_freq_bands=4
)

In [27]:
learning_rate = 0.001
num_epochs = 50
# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        # Unpack the batch
        modalities, labels = batch
        modality_1 = modalities[0]  # Each has shape (batch_size, feature, 1)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        inputs = [modality_1]
        output = model(inputs)

        #print(output, labels)
        # Reshape labels to match the output shape if necessary
        labels = labels.view(output.shape)  # Ensures labels has shape (batch_size, 1)

        # Compute loss
        loss = criterion(output, labels)
        RMSE_loss = torch.sqrt(loss)

        # Backward pass and optimize
        RMSE_loss.backward()
        optimizer.step()

        # Print loss
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {RMSE_loss.item():.4f}")

Epoch [1/50], Loss: 39.2518
Epoch [1/50], Loss: 34.8764
Epoch [1/50], Loss: 34.2645
Epoch [1/50], Loss: 31.5906
Epoch [1/50], Loss: 37.2623
Epoch [1/50], Loss: 32.2111
Epoch [1/50], Loss: 26.6679
Epoch [1/50], Loss: 40.4853
Epoch [1/50], Loss: 38.8633
Epoch [1/50], Loss: 31.2483
Epoch [1/50], Loss: 15.6415
Epoch [2/50], Loss: 32.4840
Epoch [2/50], Loss: 31.0815
Epoch [2/50], Loss: 31.7376
Epoch [2/50], Loss: 29.9207
Epoch [2/50], Loss: 35.9818
Epoch [2/50], Loss: 31.0198
Epoch [2/50], Loss: 25.5247
Epoch [2/50], Loss: 39.2864
Epoch [2/50], Loss: 37.5775
Epoch [2/50], Loss: 30.0137
Epoch [2/50], Loss: 14.3279
Epoch [3/50], Loss: 31.2709
Epoch [3/50], Loss: 29.7913
Epoch [3/50], Loss: 30.4817
Epoch [3/50], Loss: 28.7427
Epoch [3/50], Loss: 34.7999
Epoch [3/50], Loss: 29.8129
Epoch [3/50], Loss: 24.3454
Epoch [3/50], Loss: 38.0369
Epoch [3/50], Loss: 36.2372
Epoch [3/50], Loss: 28.7366
Epoch [3/50], Loss: 12.9709
Epoch [4/50], Loss: 30.0034
Epoch [4/50], Loss: 28.4238
Epoch [4/50], Loss: 

In [28]:
dev_loader = loaders.MultiModalityDataset(features_csv[features_csv["split"]=="dev"], 
                                            modalities = {"audio"}, label = "PTSD_severity")
dev_dataloader = DataLoader(dev_loader, batch_size=16, collate_fn=loaders.collate_fn)

In [29]:
# Model prediction
model.eval()  # Set model to evaluation mode

predictions = []
with torch.no_grad():  # Disable gradient computation
    for batch in dev_dataloader:
        modalities, _ = batch  # Ignore labels if unavailable
        modality_1 = modalities[0]
        
        # Forward pass
        inputs = [modality_1]
        output = model(inputs)  # Model's prediction
        
        # Collect predictions
        predictions.append(output)

# Combine predictions into a single tensor
predictions = torch.cat(predictions, dim=0)
y_pred = np.array(predictions)
print(f"Predictions:\n{predictions}")

Predictions:
tensor([[34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],
        [34.1154],

In [30]:
y_test = np.array(features_csv[features_csv["split"]=="dev"]["PTSD_severity"].values)

In [42]:
features_csv['col_encoded'] = features_csv['gender'].map({'female': 0, 'male': 1, None:1})

In [43]:
np.unique(features_csv['col_encoded'])

array([ 0.,  1., nan])

In [44]:
from fairlearn.metrics import demographic_parity_difference

# Assuming `gender` corresponds to the original metadata
# Split the dataset into training and testing sets for `gender`
gender_train, gender_test = features_csv[features_csv["split"]=="train"]["col_encoded"], features_csv[features_csv["split"]=="dev"]["col_encoded"]

# Ensure that `gender_test`, `y_test`, and `y_pred` are aligned
print(f"Shape of gender_test: {gender_test.shape}")
print(f"Shape of y_test: {y_test.shape}")
print(f"Shape of y_pred: {y_pred.shape}")



Shape of gender_test: (56,)
Shape of y_test: (56,)
Shape of y_pred: (56, 1)


In [46]:
gender_test

4      1.0
5      NaN
6      0.0
11     1.0
14     1.0
15     1.0
24     1.0
29     1.0
30     0.0
32     1.0
36     1.0
43     0.0
46     1.0
54     1.0
61     0.0
66     1.0
72     0.0
74     0.0
76     1.0
77     0.0
80     0.0
81     1.0
89     1.0
94     1.0
103    0.0
111    0.0
119    0.0
124    1.0
126    1.0
138    1.0
143    0.0
147    0.0
149    0.0
150    1.0
158    1.0
163    1.0
164    0.0
167    1.0
172    1.0
184    0.0
189    1.0
190    0.0
193    1.0
197    1.0
206    0.0
208    1.0
212    1.0
223    1.0
234    1.0
238    1.0
239    1.0
247    0.0
249    0.0
253    1.0
268    1.0
269    1.0
Name: col_encoded, dtype: float64

In [47]:
# Calculate Demographic Parity Difference
demographic_parity = demographic_parity_difference(
    y_true=y_test,
    y_pred=y_pred[:,0],
    sensitive_features=gender_test
)

print(f"Demographic Parity Difference: {demographic_parity}")

Demographic Parity Difference: 0.0


In [33]:
from fairlearn.metrics import MetricFrame, selection_rate
metric_frame = MetricFrame(metrics=selection_rate, 
                           y_true=y_test, 
                           y_pred=y_pred[:,0], 
                           sensitive_features=gender_test)

# Get selection rates for each group
selection_rates = metric_frame.by_group
print("Selection Rates by Group:")
print(selection_rates)


Selection Rates by Group:
gender
female     0.0
male       0.0
unknown    0.0
Name: selection_rate, dtype: float64


In [34]:
group_a_rate = selection_rates[1]
group_b_rate = selection_rates[0]

if group_b_rate > 0:  # Avoid division by zero
    demographic_parity_ratio = group_a_rate / group_b_rate
    print(f"Demographic Parity Ratio: {demographic_parity_ratio}")

  group_a_rate = selection_rates[1]
  group_b_rate = selection_rates[0]
