# Imports

In [1]:
%load_ext autoreload

In [2]:
import sys
sys.path.append('/home/hice1/mbibars3/scratch/vlm-debiasing/VLM-Debiasing-Project/scripts')

In [3]:
%autoreload 2
import pandas as pd
import numpy as np
import os
import model as m
import torch
import torch.nn as nn
import torch.optim as optim
import loaders
from torch.utils.data import Dataset, DataLoader


# Check model training on random arrays 

In [31]:
# Assuming MultiModalPerceiver is defined as in the code above
# Define input dimensions for each modality
input_dims = [64, 128, 256]  # These could be the feature dimensions for each modality

# Initialize MultiModalPerceiver model
model = m.MultiModalPerceiver(
    input_dims=input_dims,
    input_channels=1,
    input_axis=1,
    projection_dim=256,
    num_latents=16,
    latent_dim=128,
    depth=8,
    cross_heads=8,
    latent_heads=8,
    cross_dim_head=32,
    latent_dim_head=32,
    attn_dropout=0.1,
    ff_dropout=0.0,
    output_dim=1,
    weight_tie_layers=True,
    fourier_encode_data=False,
    max_freq=10,
    num_freq_bands=4
)

# Create random data for each modality
batch_size = 10  # Number of samples in a batch
modality_1 = torch.randn(batch_size, input_dims[0])  # First modality with input dimension 64
modality_2 = torch.randn(batch_size, input_dims[1])  # Second modality with input dimension 128
modality_3 = torch.randn(batch_size, input_dims[2])  # Third modality with input dimension 256

# Combine modalities into a list and pass through the model
input_data = [modality_1, modality_2, modality_3]
output = model(input_data)

# Print output shape to verify
print("Output shape:", output.shape)  # Expected shape: [batch_size, output_dim]


torch.Size([10, 768, 1])
Output shape: torch.Size([10, 1])


In [32]:
print(output)

tensor([[-1.0926],
        [-1.1314],
        [-1.0889],
        [-1.1205],
        [-1.0704],
        [-1.0984],
        [-1.0807],
        [-1.0941],
        [-1.1047],
        [-1.1045]], grad_fn=<AddmmBackward0>)


In [None]:
# Dummy target output (regression target)
target = torch.randn(batch_size, 1)  # Shape: [batch_size, output_dim]

learning_rate = 0.001
num_epochs = 5
# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    # Zero the parameter gradients
    optimizer.zero_grad()
    
    # Forward pass
    inputs = [modality_1, modality_2, modality_3]
    output = model(inputs)
    
    # Compute loss
    loss = criterion(output, target)
    
    # Backward pass and optimize
    loss.backward()
    optimizer.step()
    
    # Print loss
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/5], Loss: 2.0485
Epoch [2/5], Loss: 16.5415
Epoch [3/5], Loss: 3.2676
Epoch [4/5], Loss: 1.2193
Epoch [5/5], Loss: 2.2094


In [38]:
import numpy as np
arr = np.load('/home/hice1/mbibars3/scratch/vlm-debiasing/data/e-daic/untarred/717_P/717_AUDIO_ast9_pooled.npy')
arr.shape

(768, 1)

# Example training the model using the dataloader

## Building the dataloader with dummy multimodal data

The following cell is optional to create and save dummy multimodal data

```py
# Define paths for saving the dummy data
os.makedirs("../dummy_data", exist_ok=True)

# Generate dummy data and save it to .npy files
num_samples = 10
feature_size = 5

data = {
    "label": [],
    "modality1_path": [],
    "modality2_path": [],
    "modality3_path": []
}

for i in range(num_samples):
    # Create dummy numpy arrays of shape (feature, 1)
    modality1 = np.random.rand(feature_size, 1)
    modality2 = np.random.rand(feature_size, 1)
    modality3 = np.random.rand(feature_size, 1)

    # Save each modality to a separate file
    modality1_path = f"../dummy_data/modality1_sample{i}.npy"
    modality2_path = f"../dummy_data/modality2_sample{i}.npy"
    modality3_path = f"../dummy_data/modality3_sample{i}.npy"
    np.save(modality1_path, modality1)
    np.save(modality2_path, modality2)
    np.save(modality3_path, modality3)
    
    # Populate data for the DataFrame
    data["label"].append(np.random.randint(0, 2))  # Random label (0 or 1)
    data["modality1_path"].append(modality1_path)
    data["modality2_path"].append(modality2_path)
    data["modality3_path"].append(modality3_path)

# Create the DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV
df.to_csv("../dummy_data/data.csv", index=False)

print("Dummy data created and saved to 'dummy_data' folder.")
```

In [15]:
df = pd.read_csv("../dummy_data/data.csv")
df

Unnamed: 0,label,modality1_path,modality2_path,modality3_path
0,1,../dummy_data/modality1_sample0.npy,../dummy_data/modality2_sample0.npy,../dummy_data/modality3_sample0.npy
1,0,../dummy_data/modality1_sample1.npy,../dummy_data/modality2_sample1.npy,../dummy_data/modality3_sample1.npy
2,1,../dummy_data/modality1_sample2.npy,../dummy_data/modality2_sample2.npy,../dummy_data/modality3_sample2.npy
3,0,../dummy_data/modality1_sample3.npy,../dummy_data/modality2_sample3.npy,../dummy_data/modality3_sample3.npy
4,0,../dummy_data/modality1_sample4.npy,../dummy_data/modality2_sample4.npy,../dummy_data/modality3_sample4.npy
5,1,../dummy_data/modality1_sample5.npy,../dummy_data/modality2_sample5.npy,../dummy_data/modality3_sample5.npy
6,0,../dummy_data/modality1_sample6.npy,../dummy_data/modality2_sample6.npy,../dummy_data/modality3_sample6.npy
7,1,../dummy_data/modality1_sample7.npy,../dummy_data/modality2_sample7.npy,../dummy_data/modality3_sample7.npy
8,1,../dummy_data/modality1_sample8.npy,../dummy_data/modality2_sample8.npy,../dummy_data/modality3_sample8.npy
9,1,../dummy_data/modality1_sample9.npy,../dummy_data/modality2_sample9.npy,../dummy_data/modality3_sample9.npy


In [24]:
dataset = loaders.MultiModalityDataset(df, modalities = {"modality1_path", "modality2_path", "modality3_path"})
dataloader = DataLoader(dataset, batch_size=2, collate_fn=loaders.collate_fn)

for batch in dataloader:
    modalities, labels = batch
    print(f"Modality 1 shape: {modalities[0].shape}")  # Expected shape: (batch_size, feature, 1)
    print(f"Modality 2 shape: {modalities[1].shape}")
    print(f"Modality 3 shape: {modalities[2].shape}")
    print(f"Labels shape: {labels}")

Modality 1 shape: torch.Size([2, 5, 1])
Modality 2 shape: torch.Size([2, 5, 1])
Modality 3 shape: torch.Size([2, 5, 1])
Labels shape: tensor([1., 0.])
Modality 1 shape: torch.Size([2, 5, 1])
Modality 2 shape: torch.Size([2, 5, 1])
Modality 3 shape: torch.Size([2, 5, 1])
Labels shape: tensor([1., 0.])
Modality 1 shape: torch.Size([2, 5, 1])
Modality 2 shape: torch.Size([2, 5, 1])
Modality 3 shape: torch.Size([2, 5, 1])
Labels shape: tensor([0., 1.])
Modality 1 shape: torch.Size([2, 5, 1])
Modality 2 shape: torch.Size([2, 5, 1])
Modality 3 shape: torch.Size([2, 5, 1])
Labels shape: tensor([0., 1.])
Modality 1 shape: torch.Size([2, 5, 1])
Modality 2 shape: torch.Size([2, 5, 1])
Modality 3 shape: torch.Size([2, 5, 1])
Labels shape: tensor([1., 1.])


## Training the model

In [25]:
# Define input dimensions for each modality
input_dims = [5, 5, 5]  # These are the feature dimensions for each modality

# Initialize MultiModalPerceiver model
model = m.MultiModalPerceiver(
    input_dims=input_dims,
    input_channels=1,
    input_axis=1,
    projection_dim=256,
    num_latents=16,
    latent_dim=128,
    depth=8,
    cross_heads=8,
    latent_heads=8,
    cross_dim_head=32,
    latent_dim_head=32,
    attn_dropout=0.1,
    ff_dropout=0.0,
    output_dim=1,
    weight_tie_layers=True,
    fourier_encode_data=False,
    max_freq=10,
    num_freq_bands=4
)

In [1]:
learning_rate = 0.001
num_epochs = 5
# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        # Unpack the batch
        modalities, labels = batch
        modality_1, modality_2, modality_3 = modalities  # Each has shape (batch_size, feature, 1)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        inputs = [modality_1, modality_2, modality_3]
        output = model(inputs)

        #print(output, labels)
        # Reshape labels to match the output shape if necessary
        labels = labels.view(output.shape)  # Ensures labels has shape (batch_size, 1)

        # Compute loss
        loss = criterion(output, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Print loss
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

NameError: name 'nn' is not defined