# Multi-Input Fusion Model for 5-class Classification

This will combine the facial feature attributes with the image data itself.

Idea is as follows:

1) Transformer for Image Processing: Use a Vision Transformer (ViT) as the image processing component. ViTs divide the image into patches and process these through self-attention mechanisms, which can capture complex patterns and relationships within the image data.

2) Dense Network for Structured Data: Implement a deep neural network (DNN) with multiple dense layers to process the structured features from JSON array. This pathway will handle the attributes, labels, and conditions associated with each image.

3) Fusion Layer: After processing the image and structured data separately, combine their representations using a fusion layer.

4) Output Layer

## Prep

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
import os
from PIL import Image
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, recall_score, accuracy_score
from torch.utils.data import Dataset
from torchvision import transforms
import numpy as np
from sklearn.metrics import recall_score

In [7]:
!gdown "https://drive.google.com/uc?id=1P-ypjfLTZsEpBSyMdzoeThlWd-l1a5rN"

Downloading...
From (original): https://drive.google.com/uc?id=1P-ypjfLTZsEpBSyMdzoeThlWd-l1a5rN
From (redirected): https://drive.google.com/uc?id=1P-ypjfLTZsEpBSyMdzoeThlWd-l1a5rN&confirm=t&uuid=83ac9f1c-18ed-4403-9683-9c5bacd41958
To: c:\Users\larak\OneDrive\Documents\GitHub\COMS-4995-ACV-Project\models_v2\preprocessed_data-20240325T171740Z-001.zip

  0%|          | 0.00/415M [00:00<?, ?B/s]
  0%|          | 524k/415M [00:00<03:08, 2.20MB/s]
  0%|          | 1.05M/415M [00:00<04:25, 1.56MB/s]
  0%|          | 1.57M/415M [00:01<04:45, 1.45MB/s]
  1%|          | 2.10M/415M [00:01<04:17, 1.61MB/s]
  1%|          | 2.62M/415M [00:01<03:31, 1.95MB/s]
  1%|          | 3.15M/415M [00:01<03:52, 1.77MB/s]
  1%|          | 3.67M/415M [00:02<04:56, 1.39MB/s]
  1%|          | 4.19M/415M [00:02<05:14, 1.31MB/s]
  1%|          | 4.72M/415M [00:03<06:05, 1.12MB/s]
  1%|▏         | 5.24M/415M [00:03<05:31, 1.24MB/s]
  1%|▏         | 5.77M/415M [00:04<04:47, 1.42MB/s]
  2%|▏         | 6.29M/415M [00:

In [10]:
ls

 Volume in drive C is OS
 Volume Serial Number is 8400-BE27

 Directory of c:\Users\larak\OneDrive\Documents\GitHub\COMS-4995-ACV-Project\models_v2

04-10-2024  01:56 PM    <DIR>          .
04-10-2024  11:39 AM    <DIR>          ..
04-10-2024  11:39 AM           333,082 multiclass_model on age_dataset.ipynb
04-10-2024  01:49 PM             3,383 multi-input_fusion_model.ipynb
04-10-2024  01:56 PM       415,293,511 preprocessed_data-20240325T171740Z-001.zip
04-10-2024  11:39 AM         3,041,033 VIT.ipynb
               4 File(s)    418,671,009 bytes
               2 Dir(s)   9,111,248,896 bytes free


In [21]:
!unzip preprocessed_data-20240325T171740Z-001.zip

'unzip' is not recognized as an internal or external command,
operable program or batch file.


In [5]:
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [22]:
!ls preprocessed_data

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
!ls preprocessed_data/spoof

In [None]:
import torch
from torch.utils.data import Dataset
import json
from PIL import Image
import os

class SpoofDataset(Dataset):
    def __init__(self, data_dir, label_file, transform=None):
        """
        Args:
            data_dir: Directory with all the images, separated into 'live' and 'spoof' subdirectories.
            label_file: Path to JSON file with labels, where label 0 indicates 'live' and others indicate 'spoof'.
            transform: Optional transform.
        """
        self.data_dir = data_dir
        self.transform = transform
        with open(label_file, 'r') as f:
            self.labels = json.load(f)

        self.img_paths = []
        self.img_labels = []
        for img_name, label in self.labels.items():
            # Correct the file extension from .png to .jpg
            img_name = img_name.replace('.png', '.jpg')
            subfolder = 'live' if label == 0 else 'spoof'
            full_path = os.path.join(self.data_dir, subfolder, img_name)
            if os.path.exists(full_path):
                self.img_paths.append(full_path)
                self.img_labels.append(label)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.img_labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


In [None]:
data_dir = 'preprocessed_data'
label_file = 'train_label_complete.json'

dataset = SpoofDataset(data_dir=data_dir, label_file=label_file, transform=data_transforms)

# Split dataset into train and validation sets
num_train = int(len(dataset) * 0.8)
num_val = len(dataset) - num_train
train_dataset, val_dataset = random_split(dataset, [num_train, num_val])

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True),
    'val': DataLoader(val_dataset, batch_size=32, shuffle=False, pin_memory=True)
}

model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
device

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize best metrics tracking
best_metrics = {
    'epoch': 0,
    'val_loss': float('inf'),
    'val_accuracy': 0,
    'val_precision': 0,
    'val_recall': 0,
    'val_f1': 0,
}

In [None]:
# Training loop
for epoch in range(10):
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        all_preds = []
        all_labels = []

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_accuracy = accuracy_score(all_labels, all_preds)
        epoch_precision = precision_score(all_labels, all_preds, zero_division=0, average='macro')
        epoch_recall = recall_score(all_labels, all_preds, zero_division=0, average='macro')
        epoch_f1 = f1_score(all_labels, all_preds, zero_division=0, average='macro')

        print(f'Epoch {epoch+1} {phase.upper()} Loss: {epoch_loss:.4f} Accuracy: {epoch_accuracy:.4f} Precision: {epoch_precision:.4f} Recall: {epoch_recall:.4f} F1: {epoch_f1:.4f}')

        # Update best metrics for validation phase
        if phase == 'val' and (epoch_loss < best_metrics['val_loss'] or epoch_recall > best_metrics['val_recall']):
            best_metrics.update({
                'epoch': epoch + 1,
                'val_loss': epoch_loss,
                'val_accuracy': epoch_accuracy,
                'val_precision': epoch_precision,
                'val_recall': epoch_recall,
                'val_f1': epoch_f1,
            })

print(f"Best Metrics at Epoch {best_metrics['epoch']}:")
for metric, value in best_metrics.items():
    if metric != 'epoch':
        print(f"{metric.capitalize()}: {value:.4f}")
