In [None]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_path="/content/drive/MyDrive/part_A_final/train_data"
test_path="/content/drive/MyDrive/part_A_final/test_data"

In [None]:
from scipy.io import loadmat

mat = loadmat("/content/drive/MyDrive/part_A_final/test_data/ground_truth/GT_IMG_1.mat")  # Replace gt_path with the path to one of your .mat files
print(mat.keys())


dict_keys(['__header__', '__version__', '__globals__', 'image_info'])


In [None]:
mat = loadmat("/content/drive/MyDrive/part_A_final/test_data/ground_truth/GT_IMG_1.mat")
image_info = mat['image_info']
print(image_info)


In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from scipy.io import loadmat
from torchvision import transforms
import cv2
from scipy.ndimage import gaussian_filter


def generate_density_map(image_shape, points):

    density_map = np.zeros(image_shape, dtype=np.float32)
    for point in points:
        x, y = int(point[0]), int(point[1])
        if x >= image_shape[1] or y >= image_shape[0]:
            continue
        density_map[y, x] += 1
    density_map = gaussian_filter(density_map, sigma=15)
    return density_map


Loading datasets...


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/part_A_final/train_data/images'

In [None]:

class CrowdDataset(Dataset):
    def __init__(self, image_dir, gt_dir, transform=None, output_size=(256, 256)):
        self.image_dir = image_dir
        self.gt_dir = gt_dir
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
        self.transform = transform
        self.output_size = output_size  

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        gt_path = os.path.join(self.gt_dir, f"GT_{self.image_files[idx][:-4]}.mat")

       
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        mat = loadmat(gt_path)
        image_info = mat['image_info']
        locations = image_info[0][0][0][0][0]

        density_map = generate_density_map(img.shape[:2], locations)

        img = cv2.resize(img, self.output_size)
        density_map = cv2.resize(density_map, self.output_size)
        density_map = density_map * (np.sum(density_map) / np.sum(density_map))

        if self.transform:
            img = self.transform(img)

        density_map = torch.tensor(density_map, dtype=torch.float32).unsqueeze(0)
        return img, density_map


In [None]:

class DepthEmbeddedLCDnet(nn.Module):
    def __init__(self):
        super(DepthEmbeddedLCDnet, self).__init__()
        self.depth_encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, padding=1),
            nn.ReLU()
        )

        self.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(80, 128, kernel_size=1, stride=1)
        self.output = nn.Conv2d(128, 1, kernel_size=1, stride=1)

    def forward(self, img, depth):
        depth_features = self.depth_encoder(depth)

        x = torch.relu(self.conv1(img))
        x = torch.relu(self.conv2(x))
        x1 = torch.relu(self.conv3(x))
        x2 = torch.relu(self.conv4(x1))
        x3 = torch.relu(self.conv5(x2))

        combined = torch.cat((x3, depth_features), dim=1)
        x4 = torch.relu(self.conv6(combined))
        out = self.output(x4)
        return out


In [None]:

def train_model(model, dataloader, optimizer, criterion, device, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        print(f"Starting epoch {epoch+1}/{num_epochs}...")
        for batch_idx, (img, gt_density) in enumerate(dataloader):
            img, gt_density = img.to(device), gt_density.to(device)

            optimizer.zero_grad()
            depth = torch.mean(img, dim=1, keepdim=True)
            pred_density = model(img, depth)

            loss = criterion(pred_density, gt_density)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            if batch_idx % 10 == 0:
                print(f"Batch {batch_idx+1}/{len(dataloader)}, Loss: {loss.item():.4f}")

        print(f"Epoch {epoch+1} completed. Average Loss: {epoch_loss/len(dataloader):.4f}")


In [None]:

def test_model(model, dataloader, device):
    print("Starting testing...")
    model.eval()
    mae = 0.0
    with torch.no_grad():
        for batch_idx, (img, gt_density) in enumerate(dataloader):
            img, gt_density = img.to(device), gt_density.to(device)
            depth = torch.mean(img, dim=1, keepdim=True)
            pred_density = model(img, depth)
            mae += torch.abs(pred_density.sum() - gt_density.sum()).item()

            if batch_idx % 10 == 0:
                print(f"Processed batch {batch_idx+1}/{len(dataloader)}...")

    print("Testing completed.")
    return mae / len(dataloader)


In [None]:

def main():
    train_img_dir = "/content/drive/MyDrive/part_A_final/train_data/images"
    train_gt_dir = "/content/drive/MyDrive/part_A_final/train_data/ground_truth"
    test_img_dir = "/content/drive/MyDrive/part_A_final/test_data/images"
    test_gt_dir = "/content/drive/MyDrive/part_A_final/test_data/ground_truth"

    print("Loading datasets...")
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = CrowdDataset(train_img_dir, train_gt_dir, transform, output_size=(256, 256))
    test_dataset = CrowdDataset(test_img_dir, test_gt_dir, transform, output_size=(256, 256))
    train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
    print("Datasets loaded successfully.")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DepthEmbeddedLCDnet().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.MSELoss()

    print("Starting training...")
    train_model(model, train_loader, optimizer, criterion, device, num_epochs=3)
    print("Training completed.")

    torch.save(model.state_dict(), "depth_embedded_lcdnet.pth")
    print("Model saved to depth_embedded_lcdnet.pth")

    mae = test_model(model, test_loader, device)
    print(f"Mean Absolute Error on Test Dataset: {mae:.2f}")

if __name__ == "__main__":
    main()


In [None]:

model = DepthEmbeddedLCDnet()

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Trainable Parameters in DepthEmbeddedLCDnet: {total_params}")


Total Trainable Parameters in DepthEmbeddedLCDnet: 100977
