### Homework 3

In [31]:
# Lib
import os
import shutil
from sklearn.model_selection import train_test_split
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from torchvision import models

#### Data Preparation

In [2]:
!pip install kaggle



In [3]:
from google.colab import files
files.upload()  # Uploadig the Kaggle api json file
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json  # Permissions


Saving kaggle.json to kaggle.json


#### Uploading Dataset







In [4]:
# Creating a directory to store the dataset
os.makedirs('intel_image_classification', exist_ok=True)

# Downloading the dataset
!kaggle datasets download -d puneet6060/intel-image-classification -p intel_image_classification/

Dataset URL: https://www.kaggle.com/datasets/puneet6060/intel-image-classification
License(s): copyright-authors
Downloading intel-image-classification.zip to intel_image_classification
 98% 339M/346M [00:02<00:00, 106MB/s]
100% 346M/346M [00:02<00:00, 127MB/s]


In [5]:
# Unzipping the dataset

import zipfile
with zipfile.ZipFile('intel_image_classification/intel-image-classification.zip', 'r') as zip_ref:
    zip_ref.extractall('intel_image_classification/')

print("Dataset downloaded and extracted successfully!")


Dataset downloaded and extracted successfully!


In [6]:
# Defining the source directory and target directory
source_dir = 'intel_image_classification/seg_train/seg_train'
target_dir = 'intel_custom_dataset'

In [7]:
# Creating the target directory if it doesn't exist
os.makedirs(target_dir, exist_ok=True)

#### STEP 1

In [8]:
# Defining the categories
categories = ['buildings', 'forest', 'mountain']

# Copying  categories to the new directory
for category in categories:
    src = os.path.join(source_dir, category)
    dest = os.path.join(target_dir, category)
    shutil.copytree(src, dest)

print("Selected categories have been organized in the custom_dataset directory!")

Selected categories have been organized in the custom_dataset directory!


In [9]:
# Defining directories
dataset_dir = 'intel_custom_dataset'
train_dir = 'intel_custom_dataset_split/train'
test_dir = 'intel_custom_dataset_split/test'

### STEP 2

In [10]:
# Creating directories for train/test splits
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [11]:
# 80- 20 Split ratio
split_ratio = 0.2

In [12]:
# Function to split the data
def split_data(category):
    category_path = os.path.join(dataset_dir, category)
    images = os.listdir(category_path)

    # Splitting the images
    train_images, test_images = train_test_split(images, test_size=split_ratio)

    # Creating category folders in train and test directories
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

    # Move train images
    for img in train_images:
        shutil.copy(os.path.join(category_path, img), os.path.join(train_dir, category, img))

    # Move test images
    for img in test_images:
        shutil.copy(os.path.join(category_path, img), os.path.join(test_dir, category, img))

In [13]:
# Performing the split for each category
categories = ['buildings', 'forest', 'mountain']
for category in categories:
    split_data(category)

print("Data split into training and test sets successfully!")

Data split into training and test sets successfully!


### STEP 3

In [14]:
# Defining data transformations for training and testing sets
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),    # Randomly crop and resize to 224x224
        transforms.RandomHorizontalFlip(),    # Random horizontal flip (augmentation)
        transforms.ToTensor(),                # Convert image to PyTorch tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize using ImageNet stats
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),               # Resize to 256
        transforms.CenterCrop(224),           # Center crop to 224x224
        transforms.ToTensor(),                # Convert image to PyTorch tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize using ImageNet stats
    ])
}

In [15]:
# Loading the datasets with ImageFolder
data_dir = 'intel_custom_dataset_split'
image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), data_transforms['train']),
    'test': datasets.ImageFolder(os.path.join(data_dir, 'test'), data_transforms['test'])
}


In [16]:
# Creating dataloaders
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True),
    'test': DataLoader(image_datasets['test'], batch_size=32, shuffle=False)
}

In [17]:
# Getting the size of the datasets
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

print(f"Classes: {class_names}")
print(f"Dataset sizes: {dataset_sizes}")


Classes: ['buildings', 'forest', 'mountain']
Dataset sizes: {'train': 5577, 'test': 1397}


### STEP 4

In [18]:
# Defining the CNN architecture
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=3):  # 3 categories (buildings, forest, mountain)
        super(SimpleCNN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 28 * 28, 512)  # Assuming image input size is 224x224
        self.fc2 = nn.Linear(512, num_classes)

        # Pooling layer
        self.pool = nn.MaxPool2d(2, 2)

        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        # Convolutional layers with ReLU and pooling
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))

        # Flatten the output for fully connected layers
        x = x.view(-1, 64 * 28 * 28)

        # Fully connected layers
        x = self.relu(self.fc1(x))
        x = self.fc2(x)

        return x


In [19]:
# Instantiate the model
model = SimpleCNN(num_classes=3)
print (model)

SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=50176, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=3, bias=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
)


In [20]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [21]:
# Defining loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

#### Training the model

In [22]:
def train_model(model, criterion, optimizer, dataloaders, num_epochs=10):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            # Iterating over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass + optimize only in train phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    print('Training complete.')

In [23]:
# Train the model
train_model(model, criterion, optimizer, dataloaders, num_epochs=10)

Epoch 1/10
----------
train Loss: 0.6367 Acc: 0.7361
test Loss: 0.3301 Acc: 0.8676
Epoch 2/10
----------
train Loss: 0.3779 Acc: 0.8571
test Loss: 0.2433 Acc: 0.9220
Epoch 3/10
----------
train Loss: 0.2977 Acc: 0.8928
test Loss: 0.2120 Acc: 0.9306
Epoch 4/10
----------
train Loss: 0.2850 Acc: 0.8938
test Loss: 0.1685 Acc: 0.9420
Epoch 5/10
----------
train Loss: 0.2417 Acc: 0.9170
test Loss: 0.1866 Acc: 0.9377
Epoch 6/10
----------
train Loss: 0.2150 Acc: 0.9261
test Loss: 0.1820 Acc: 0.9442
Epoch 7/10
----------
train Loss: 0.2328 Acc: 0.9213
test Loss: 0.1395 Acc: 0.9506
Epoch 8/10
----------
train Loss: 0.2024 Acc: 0.9320
test Loss: 0.1155 Acc: 0.9685
Epoch 9/10
----------
train Loss: 0.1941 Acc: 0.9337
test Loss: 0.1185 Acc: 0.9656
Epoch 10/10
----------
train Loss: 0.1787 Acc: 0.9351
test Loss: 0.1237 Acc: 0.9592
Training complete.


#### STEP 5

#### Testing

In [29]:
def evaluate_model(model, dataloaders):
    model.eval()  # Set model to evaluation mode
    all_preds = []
    all_labels = []

    # Iterating over test data
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass (no gradient tracking for evaluation)
        with torch.no_grad():
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

        # Appending predictions and labels to lists
        all_preds.append(preds.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

    # Flattening the lists
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    # Classification report
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    # Confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(all_labels, all_preds))


In [30]:
# Evaluate the model on the test set
evaluate_model(model, dataloaders)

Classification Report:
              precision    recall  f1-score   support

   buildings       0.95      0.94      0.94       439
      forest       0.96      0.98      0.97       455
    mountain       0.97      0.96      0.96       503

    accuracy                           0.96      1397
   macro avg       0.96      0.96      0.96      1397
weighted avg       0.96      0.96      0.96      1397

Confusion Matrix:
[[411  17  11]
 [  2 447   6]
 [ 19   2 482]]


### STEP 6

In [32]:
# Loading the pre-trained GoogleNet model
googlenet = models.googlenet(pretrained=True)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth
100%|██████████| 49.7M/49.7M [00:01<00:00, 40.1MB/s]


In [33]:
# Freezing all layers
for param in googlenet.parameters():
    param.requires_grad = False

In [34]:
# Modifing the final layer to output 3 classes
num_ftrs = googlenet.fc.in_features
googlenet.fc = nn.Linear(num_ftrs, 3)

In [35]:
googlenet = googlenet.to(device)

In [36]:
# Defining the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(googlenet.fc.parameters(), lr=0.001)

### STEP 7

In [37]:
def train_googlenet_model(model, criterion, optimizer, dataloaders, num_epochs=10):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)

        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  #  Training mode
            else:
                model.eval()   # Evaluation mode

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass and optimization in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    print('Training complete.')

In [39]:
# Train the modified GoogleNet model
train_googlenet_model(googlenet, criterion, optimizer, dataloaders, num_epochs=10)

Epoch 1/10
----------
train Loss: 0.3206 Acc: 0.9267
test Loss: 0.0928 Acc: 0.9800
Epoch 2/10
----------
train Loss: 0.1441 Acc: 0.9579
test Loss: 0.0629 Acc: 0.9871
Epoch 3/10
----------
train Loss: 0.1251 Acc: 0.9647
test Loss: 0.0549 Acc: 0.9835
Epoch 4/10
----------
train Loss: 0.1123 Acc: 0.9627
test Loss: 0.0741 Acc: 0.9792
Epoch 5/10
----------
train Loss: 0.1036 Acc: 0.9681
test Loss: 0.0427 Acc: 0.9885
Epoch 6/10
----------
train Loss: 0.1013 Acc: 0.9666
test Loss: 0.0392 Acc: 0.9900
Epoch 7/10
----------
train Loss: 0.1105 Acc: 0.9627
test Loss: 0.0381 Acc: 0.9878
Epoch 8/10
----------
train Loss: 0.1060 Acc: 0.9670
test Loss: 0.0360 Acc: 0.9900
Epoch 9/10
----------
train Loss: 0.1090 Acc: 0.9658
test Loss: 0.0335 Acc: 0.9900
Epoch 10/10
----------
train Loss: 0.0931 Acc: 0.9684
test Loss: 0.0298 Acc: 0.9907
Training complete.


In [40]:
# Evaluating the GoogleNet model
evaluate_model(googlenet, dataloaders)


Classification Report:
              precision    recall  f1-score   support

   buildings       0.99      0.99      0.99       439
      forest       0.99      0.99      0.99       455
    mountain       0.99      0.99      0.99       503

    accuracy                           0.99      1397
   macro avg       0.99      0.99      0.99      1397
weighted avg       0.99      0.99      0.99      1397

Confusion Matrix:
[[434   3   2]
 [  0 451   4]
 [  3   1 499]]
