In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

In [None]:
!pip install gdown
from pathlib import PosixPath
# CollabDiff
!gdown --id 1GpGvkxQ7leXqCnfnEAsgY_DXFnJwIbO4 -c
!unzip -q -n CollabDiff.zip
image_path = PosixPath("content/drive/MyDrive/GenAI/CollabDiff")

Downloading...
From (original): https://drive.google.com/uc?id=1GpGvkxQ7leXqCnfnEAsgY_DXFnJwIbO4
From (redirected): https://drive.google.com/uc?id=1GpGvkxQ7leXqCnfnEAsgY_DXFnJwIbO4&confirm=t&uuid=41f62b3e-9168-4f65-92e0-605f176a2b62
To: /content/CollabDiff.zip
100% 354M/354M [00:06<00:00, 58.6MB/s]


In [None]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x795557653ef0>

In [None]:
def get_req_set(path):
  df = pd.read_csv(path)
  features_df = df['features'].str.strip('[]').str.split(',', expand=True)
  features_df = features_df.astype(float)
  features_df.columns = [f'feature_{i}' for i in range(features_df.shape[1])]
  df_expanded = pd.concat([features_df, df['label']], axis=1)
  X = df_expanded.drop(columns=['label'])
  y = df_expanded['label']
  X_tensor = torch.tensor(X.values, dtype=torch.float32)
  y_tensor = torch.tensor(y.values, dtype=torch.long)
  dataset = TensorDataset(X_tensor, y_tensor)
  print(len(dataset))
  temp_loader = DataLoader(dataset, batch_size=32, shuffle=True)
  return temp_loader

In [None]:
#JPEGS taken from all folders

In [None]:
train_loader = get_req_set('/content/train_features.csv')
val_loader = get_req_set('/content/val_features.csv')
test_loader = get_req_set('/content/test_features.csv')

1400
200
400


In [None]:
class DNN(nn.Module):
    def __init__(self, input_dim, hidden_dim_1, hidden_dim_2, output_dim, dropout_prob=0.2):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim_1)
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(p=dropout_prob)
        self.fc2 = nn.Linear(hidden_dim_1, hidden_dim_2)
        self.dropout2 = nn.Dropout(p=dropout_prob)
        self.fc3 = nn.Linear(hidden_dim_2, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x


In [None]:
# Updated loop with four validation datasets and additional metrics
input_dim = 768      # Number of features in the produced dataset
hidden_dim_1 = 128
hidden_dim_2 = 256
output_dim = 2 # Number of classes -- 2
model = DNN(input_dim, hidden_dim_1, hidden_dim_2, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
train_losses, test_losses = [], []
train_accuracies, test_accuracies = [], []
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == batch_y).sum().item()
        total_samples += batch_y.size(0)

    # Calculate train loss and metrics
    train_loss = running_loss / len(train_loader)
    train_accuracy = correct_predictions / total_samples * 100
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    # Evaluate on train data
    model.eval()
    with torch.no_grad():
        y_true_train = []
        y_pred_train = []
        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            y_true_train.extend(batch_y.cpu().numpy())
            y_pred_train.extend(predicted.cpu().numpy())

    # Calculate train metrics
    train_precision = precision_score(y_true_train, y_pred_train, average='binary')
    train_recall = recall_score(y_true_train, y_pred_train, average='binary')
    train_f1 = f1_score(y_true_train, y_pred_train, average='binary')

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Train Precision: {train_precision:.2f}, Train Recall: {train_recall:.2f}, Train F1: {train_f1:.2f}")

    # Validation loaders and names
    val_loaders = [val_loader, test_loader]
    val_names = ['Validation','Testing']

    # Evaluate on each validation set
    for val_loader, val_name in zip(val_loaders, val_names):
        y_true_val = []
        y_pred_val = []
        val_loss = 0.0

        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                y_true_val.extend(batch_y.cpu().numpy())
                y_pred_val.extend(predicted.cpu().numpy())

        # Calculate metrics for each validation set
        val_loss /= len(val_loader)
        val_accuracy = accuracy_score(y_true_val, y_pred_val) * 100
        val_precision = precision_score(y_true_val, y_pred_val, average='binary')
        val_recall = recall_score(y_true_val, y_pred_val, average='binary')
        val_f1 = f1_score(y_true_val, y_pred_val, average='binary')

        print(f"{val_name} - "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, "
              f"Val Precision: {val_precision:.2f}, Val Recall: {val_recall:.2f}, Val F1: {val_f1:.2f}")

    print("------------------------------------------------")

Epoch [1/10], Train Loss: 0.2822, Train Accuracy: 90.93%, Train Precision: 1.00, Train Recall: 1.00, Train F1: 1.00
Validation - Val Loss: 0.0060, Val Accuracy: 100.00%, Val Precision: 1.00, Val Recall: 1.00, Val F1: 1.00
Testing - Val Loss: 0.0039, Val Accuracy: 100.00%, Val Precision: 1.00, Val Recall: 1.00, Val F1: 1.00
------------------------------------------------
Epoch [2/10], Train Loss: 0.0022, Train Accuracy: 100.00%, Train Precision: 1.00, Train Recall: 1.00, Train F1: 1.00
Validation - Val Loss: 0.0020, Val Accuracy: 100.00%, Val Precision: 1.00, Val Recall: 1.00, Val F1: 1.00
Testing - Val Loss: 0.0022, Val Accuracy: 100.00%, Val Precision: 1.00, Val Recall: 1.00, Val F1: 1.00
------------------------------------------------
Epoch [3/10], Train Loss: 0.0006, Train Accuracy: 100.00%, Train Precision: 1.00, Train Recall: 1.00, Train F1: 1.00
Validation - Val Loss: 0.0013, Val Accuracy: 100.00%, Val Precision: 1.00, Val Recall: 1.00, Val F1: 1.00
Testing - Val Loss: 0.0013, 