In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile
import os

zip_file_path = '/content/drive/MyDrive/data/Assignment_1.zip'
extract_folder = '/content/drive/MyDrive/data'
target_folder = 'Train'

os.makedirs(extract_folder, exist_ok=True)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder, members=[member for member in zip_ref.namelist() if member.startswith(target_folder)])

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import random_split

# Define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 5)  # For RGB images
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 64, 5)
        self.conv3 = nn.Conv2d(64, 256, 5)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((5, 5))  # Adaptive pooling
        self.fc1 = nn.Linear(256 * 5 * 5, 240)
        self.fc2 = nn.Linear(240, 120)
        self.fc3 = nn.Linear(120, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.adaptive_pool(x)  # Apply adaptive pooling
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Determine the device to use
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Initialize the CNN and move it to the device
net = Net().to(device)

# Define a Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)  # Use Adam optimizer

# Define your transformations
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Load the dataset from the 'train' directory
dataset = datasets.ImageFolder(root='/content/drive/MyDrive/data/Train', transform=transform)

# Determine the lengths of your training and validation sets
train_len = int(0.8 * len(dataset))  # 80% for training
val_len = len(dataset) - train_len   # 20% for validation

# Split the dataset
trainset, valset = random_split(dataset, [train_len, val_len])

# Create data loaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=4, shuffle=False)

# Initialize variables for early stopping
patience = 5  # Number of epochs to wait for improvement before stopping
best_loss = float('inf')  # Best loss achieved so far
epochs_no_improve = 0  # Number of epochs with no improvement

# Train the network
for epoch in range(40):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 20 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 20))
            running_loss = 0.0

    # After each epoch, calculate the accuracy on the validation set
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for data in valloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            val_loss += criterion(outputs, labels).item()
        print('Accuracy of the network on the validation images: %d %%' % (
            100 * correct / total))

    # Check for improvement
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve == patience:
            print('Early stopping!')
            break

print('Finished Training')

In [None]:
# Save the trained model
torch.save(net.state_dict(), '/content/drive/MyDrive/data/model.pth')

# Load the saved model
net = Net().to(device)
net.load_state_dict(torch.load('/content/drive/MyDrive/data/model.pth'))

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

class TestDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = os.listdir(root_dir)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_files[idx])
        image = Image.open(img_name)
        if self.transform:
            image = self.transform(image)
        return image, img_name

def predict_test_images(model, test_dir, device):
    # Load the test dataset
    test_dataset = TestDataset(root_dir=test_dir, transform=transform)
    testloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

    # Create a DataFrame to store the results
    results = pd.DataFrame(columns=['Image', 'Label'])

    # Make predictions on the test images
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, image_files = data[0].to(device), data[1]
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            for j in range(images.size()[0]):
                image_name = image_files[j]  # Get the image file name
                label_index = predicted[j].item()  # Get the predicted label index
                label_name = dataset.classes[label_index]  # Map the label index to its name
                results = results.append({'Image': image_name, 'Label': label_name}, ignore_index=True)

    return results

# Use the function after loading the model
model_path = '/content/drive/MyDrive/data/model.pth'  # Replace with your model file path
test_dir = '/content/drive/MyDrive/data/test_assignment_1'  # Replace with your test directory
results = predict_test_images(net, test_dir, device)
print(results)
pd.to_csv('/content/drive/MyDrive/data/final.csv')