In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


Now to import the datasets to be used as labels.

In [None]:
import os

# Define paths
BASE_DIR = "/content/drive/My Drive/Precog Task/task1/same_on_same"

# EASY_IMAGES_DIR = os.path.join(BASE_DIR, "easy_variations")
# HARD_IMAGES_DIR = os.path.join(BASE_DIR, "hard_variations")
EASY_IMAGES_DIR = BASE_DIR
HARD_IMAGES_DIR = BASE_DIR

EASY_LABELS_CSV = os.path.join(BASE_DIR, "easy_variations.csv")
HARD_LABELS_CSV = os.path.join(BASE_DIR, "hard_variations.csv")
print(EASY_IMAGES_DIR)
print(HARD_IMAGES_DIR)
print(EASY_LABELS_CSV)
print(HARD_LABELS_CSV)


/content/drive/My Drive/Precog Task/task1/same_on_same
/content/drive/My Drive/Precog Task/task1/same_on_same
/content/drive/My Drive/Precog Task/task1/same_on_same/easy_variations.csv
/content/drive/My Drive/Precog Task/task1/same_on_same/hard_variations.csv


Now that we have loaded the dataset, we will make the dataset.


In [None]:
# Transformation function will be put here.
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to PyTorch tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1,1] for stable training
])

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np

label_encoder = LabelEncoder()
# Read all words from CSV files
easy_df = pd.read_csv(EASY_LABELS_CSV)
hard_df = pd.read_csv(HARD_LABELS_CSV)
all_words = np.concatenate([easy_df['Word'].values, hard_df['Word'].values])
label_encoder.fit(all_words)

class WordImageDataset(Dataset):
    def __init__(self, csv_file, image_folder, transform=None):
        self.data = pd.read_csv(csv_file)  # Read CSV file
        print(csv_file)
        # print(image_folder)
        # for filename in os.listdir(image_folder):
        #     print(filename)
        # for index, row in self.data.iterrows():
        #     print(row['Image_Path'])
        self.image_folder = image_folder  # Folder where images are stored
        self.transform = transform  # Image transformations (optional)
        self.label_encoder = label_encoder

    def __len__(self):
        return len(self.data)  # Returns number of images in dataset

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.data.iloc[idx, 1])  # Image filename
        Image_Value = Image.open(img_name).convert("RGB")  # Convert to RGB
        Word = self.data.iloc[idx, 0]  # Word label
        Image_Path = os.path.join(self.image_folder, img_name)

        # if self.transform:
        #     image = self.transform(image)

        # return image, label
        try:
            image = Image.open(Image_Path).convert("RGB")

            if self.transform:
                image = self.transform(image)



            # return Word, image
            if self.label_encoder:
                label = self.label_encoder.transform([Word])[0]
                return image, torch.tensor(label, dtype=torch.long)
            return Word, image

        except Exception as e:
            raise RuntimeError(f"Error loading image {Image_Path}: {str(e)}")




easy_dataset = WordImageDataset(EASY_LABELS_CSV, EASY_IMAGES_DIR, transform)
hard_dataset = WordImageDataset(HARD_LABELS_CSV, HARD_IMAGES_DIR, transform)


/content/drive/My Drive/Precog Task/task1/same_on_same/easy_variations.csv
/content/drive/My Drive/Precog Task/task1/same_on_same/hard_variations.csv


We will split the dataset we have created into Training and Testing sets.
There are 1000 images per easy and hard datasets. We will take 20% for the testing dataset.


In [None]:
from torch.utils.data import random_split

# Defining train-test partition (80% train, 20% test)
train_size = int(0.8 * len(easy_dataset))
test_size = len(easy_dataset) - train_size

easy_train, easy_test = random_split(easy_dataset, [train_size, test_size])
hard_train, hard_test = random_split(hard_dataset, [train_size, test_size])


# Creating DataLoaders
batch_size = 32
train_loader = DataLoader(easy_train + hard_train, batch_size=batch_size, shuffle=True)

test_loader = DataLoader(easy_test + hard_test, batch_size=batch_size, shuffle=False)


In [None]:
# for batch_idx, (target, data) in enumerate(train_loader):
#     print(f"Batch Index: {batch_idx}")
#     print(f"Data: {data}")  # Print the data
#     print(f"Target (Labels): {target}")  # Print the labels

#     # You might want to limit the number of batches printed for brevity
#     if batch_idx == 2:  # Print only 3 batches
#         break

Now to define the architecture of the CNN.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class CNNClassifier(nn.Module):
    def __init__(self, num_classes=100):
        super(CNNClassifier, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)  # (32, 100, 400)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)  # (64, 100, 400)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Downsamples (50x200)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)  # (128, 50, 200)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)  # (256, 50, 200)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # Downsamples (25x100)

        # Fully Connected Layers
        self.fc1 = nn.Linear(256 * 25 * 100, 512)  # Flatten and connect to 512 neurons
        self.dropout = nn.Dropout(0.5)  # Dropout for regularization
        self.fc2 = nn.Linear(512, num_classes)  # Output layer

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.pool2(F.relu(self.conv4(x)))

        x = x.view(x.size(0), -1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = self.fc2(x)  # Output layer
        return x

# Instantiate the model
model = CNNClassifier(num_classes=100)


In [None]:
import torch.nn as nn
import torch.optim as optim

loss_function = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [12]:
for images, labels in train_loader:
    print("Sample Labels:", labels[:10])
    break  # Print once and exit loop

Sample Labels: tensor([29, 16, 71,  2, 86, 79, 43, 19, 18, 74])


Now to train the model.

In [None]:
num_of_epochs = 10
total_steps = len(train_loader)



for epoch in range(num_of_epochs):
    model.train()
    running_loss = 0.0

    for batch_idx, (images, labels) in enumerate(train_loader):
        # Move to device
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_function(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print progress
        if batch_idx % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_of_epochs}], Step [{batch_idx+1}/{total_steps}], Loss: {loss.item():.4f}')

    # Print epoch stats
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_of_epochs}], Average Loss: {epoch_loss:.4f}')

Epoch [1/10], Step [1/50], Loss: 4.6038
Epoch [1/10], Step [11/50], Loss: 4.6152
Epoch [1/10], Step [21/50], Loss: 4.6030
Epoch [1/10], Step [31/50], Loss: 4.6030
Epoch [1/10], Step [41/50], Loss: 4.6033
Epoch [1/10], Average Loss: 5.0939
Epoch [2/10], Step [1/50], Loss: 4.6070
Epoch [2/10], Step [11/50], Loss: 4.6018
Epoch [2/10], Step [21/50], Loss: 4.6022
Epoch [2/10], Step [31/50], Loss: 4.6046
Epoch [2/10], Step [41/50], Loss: 4.6062
Epoch [2/10], Average Loss: 4.6049
Epoch [3/10], Step [1/50], Loss: 4.6040
Epoch [3/10], Step [11/50], Loss: 4.5984
Epoch [3/10], Step [21/50], Loss: 4.6009
Epoch [3/10], Step [31/50], Loss: 4.6026
Epoch [3/10], Step [41/50], Loss: 4.6112
Epoch [3/10], Average Loss: 4.6047
Epoch [4/10], Step [1/50], Loss: 4.6082
Epoch [4/10], Step [11/50], Loss: 4.6093
Epoch [4/10], Step [21/50], Loss: 4.6000
Epoch [4/10], Step [31/50], Loss: 4.6015
Epoch [4/10], Step [41/50], Loss: 4.6082
Epoch [4/10], Average Loss: 4.6042
Epoch [5/10], Step [1/50], Loss: 4.6072
Epoc

Testing phase.

In [None]:
# Saving the model
torch.save(model.state_dict(), "cnn_model.pth")

In [None]:
model.eval() #Setting to eval mode
right=0
count=0

with torch.no_grad():
  for images, labels in test_loader:
    iamges, labels = images.to(device), labels.to(device)

    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

    count += labels.size(0)
    # right += (predicted == labels).sum().item()

    print(right)

accuracy = 100 * right/count
print(f"Test Accuracy: {accuracy:.2f}%")



RuntimeError: Boolean value of Tensor with more than one value is ambiguous

In [10]:
model = CNNClassifier(num_classes=100)  # Make sure to define CNNClassifier in the notebook

# Load model weights
model.load_state_dict(torch.load("/content/drive/My Drive/cnn_model.pth", weights_only=True))
model.to(device)  # Move model to GPU/CPU
model.eval()

CNNClassifier(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=640000, out_features=512, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out_features=100, bias=True)
)

In [11]:
model.eval()  # Set model to evaluation mode

right = 0
count = 0

with torch.no_grad():  # No gradients needed for testing
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)  # Move to device

        outputs = model(images)  # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get predicted class

        count += labels.size(0)
        right += (predicted == labels).sum().item()  # Count correct predictions

        # Print actual vs predicted labels
        print(f"Actual: {labels.tolist()}, Predicted: {predicted.tolist()}")

# Compute and print final accuracy
accuracy = 100 * right / count
print(f"Test Accuracy: {accuracy:.2f}%")


Actual: [16, 53, 46, 81, 58, 5, 4, 94, 46, 87, 75, 13, 35, 44, 4, 45, 55, 62, 70, 63, 17, 58, 96, 58, 27, 40, 58, 75, 24, 63, 52, 38], Predicted: [93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93]
Actual: [31, 80, 94, 0, 94, 17, 87, 86, 18, 0, 74, 96, 22, 84, 38, 79, 2, 35, 4, 35, 75, 52, 94, 50, 15, 58, 15, 63, 24, 55, 73, 45], Predicted: [93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93]
Actual: [94, 79, 87, 33, 49, 81, 83, 99, 70, 33, 2, 44, 64, 53, 15, 17, 33, 35, 42, 49, 83, 54, 15, 38, 88, 16, 86, 51, 83, 84, 88, 52], Predicted: [93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93]
Actual: [54, 64, 15, 24, 79, 96, 15, 54, 33, 53, 94, 44, 84, 24, 73, 50, 87, 45, 16, 96, 62, 38, 80, 33, 5, 96, 84, 83, 0, 0, 63, 81], Predicted: [93, 93, 93, 93, 93, 93, 93,