In [4]:
!pip install opencv-python glob2 keras numpy pandas matplotlib scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB)
Collecting glob2
  Downloading glob2-0.7.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: glob2
  Building wheel for glob2 (setup.py) ... [?25ldone
[?25h  Created wheel for glob2: filename=glob2-0.7-py2.py3-none-any.whl size=9320 sha256=b8545653cec1baf260730a6560a5bf0336d34ae62f98cd74c71d2d648974d55a
  Stored in directory: /home/aditya_sridhar/.cache/pip/wheels/37/07/ce/cbe8d31ad93224571b49fa03f8a5da11cdb31d3845ff73e0f3
Successfully built glob2
Installing collected packages: glob2, opencv-python
Successfully installed glob2-0.7 opencv-python-4.10.0.84


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torchvision import transforms
import numpy as np
import pandas as pd
import os
import cv2
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras
from glob import glob

2024-11-12 03:02:11.281156: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-12 03:02:12.481789: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
class CustomDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) # output size: 16 x 224 x 224
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) # output size: 32 x 224 x 224
        self.pool1 = nn.MaxPool2d(kernel_size=3) # output size: 32 x 74 x 74

        self.conv3 = nn.Conv2d(32, 32, kernel_size=3, padding=1) # output size: 32 x 74 x 74
        self.conv4 = nn.Conv2d(32, 64, kernel_size=3, padding=1)    # output size: 64 x 74 x 74
        self.pool2 = nn.MaxPool2d(kernel_size=3) # output size: 64 x 24 x 24

        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)   # output size: 128 x 24 x 24
        self.conv6 = nn.Conv2d(128, 256, kernel_size=3, padding=1) # output size: 256 x 24 x 24
        self.pool3 = nn.MaxPool2d(kernel_size=3) # output size: 256 x 8 x 8

        # Batch normalization
        self.batch_norm = nn.BatchNorm2d(256)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 2 * 2, 512)
        self.fc2 = nn.Linear(512, 201)  # Output size is 201 (number of classes)
        
        # Dropout layer
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Convolutions and pooling
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)

        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.pool3(x)

        # Batch normalization
        x = self.batch_norm(x)

        # Flatten and fully connected layers
        x = x.view(x.size(0), -1)  # Flatten the output
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)  # This should output a vector of length 201

        return x


In [8]:
labels_dict = {}

with open('/home/es21btech11007/xml/CUB_200_2011/classes.txt', 'r') as f:
    for line in f:
        v, k = line.split(' ', 1)  # Split on the first space
        labels_dict[k.strip()] = int(v.strip())

labels_dict


FileNotFoundError: [Errno 2] No such file or directory: '/home/es21btech11007/xml/CUB_200_2011/classes.txt'

In [11]:
def load_data():
    images = []
    labels = []
    size = (64, 64)
    data_path = '/home/es21btech11007/xml/CUB_200_2011/images'
    
    print('Loading Data from File...', end='')
    for folder in os.listdir(data_path):
        fol = folder.strip('._')
        
        # Check if folder is in the labels_dict
        if fol in labels_dict:
            path = os.path.join(data_path, fol)
            print(f"Processing folder: {fol}", end='|')
            for image in os.listdir(path):
                try:
                    temp_img = cv2.imread(os.path.join(path, image))
                    temp_img = cv2.resize(temp_img, size, interpolation=cv2.INTER_AREA)
                    images.append(temp_img)
                    labels.append(labels_dict[fol])
                    
                    # Data augmentation (flipping the image)
                    temp_img = cv2.flip(temp_img, flipCode=1)
                    images.append(temp_img)
                    labels.append(labels_dict[fol])
                except Exception as e:
                    print(f"Error processing {image}: {e}")
        else:
            print(f"Folder {fol} not found in labels_dict")
    
    # Check if images and labels lists are not empty
    if not images or not labels:
        print(f"Error: No images or labels found. Images length: {len(images)}, Labels length: {len(labels)}")
        return None, None, None, None

    images = np.array(images)
    images = images.astype('float32') / 255.0
    labels = np.array(labels)  # Leave as integer class indices

    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2)
    print(f'\nLoaded {len(X_train)} images for training, Train data shape: {X_train.shape}')
    print(f'Loaded {len(X_test)} images for testing, Test data shape: {X_test.shape}')

    return X_train, X_test, y_train, y_test


In [12]:
X_train, X_test, y_train, y_test = load_data()

Loading Data from File...Processing folder: 001.Black_footed_Albatross|Processing folder: 002.Laysan_Albatross|Processing folder: 003.Sooty_Albatross|Processing folder: 004.Groove_billed_Ani|Processing folder: 005.Crested_Auklet|Processing folder: 006.Least_Auklet|Processing folder: 007.Parakeet_Auklet|Processing folder: 008.Rhinoceros_Auklet|Processing folder: 009.Brewer_Blackbird|Processing folder: 010.Red_winged_Blackbird|Processing folder: 011.Rusty_Blackbird|Processing folder: 012.Yellow_headed_Blackbird|Processing folder: 013.Bobolink|Processing folder: 014.Indigo_Bunting|Processing folder: 015.Lazuli_Bunting|Processing folder: 016.Painted_Bunting|Processing folder: 017.Cardinal|Processing folder: 018.Spotted_Catbird|Processing folder: 019.Gray_Catbird|Processing folder: 020.Yellow_breasted_Chat|Processing folder: 021.Eastern_Towhee|Processing folder: 022.Chuck_will_Widow|Processing folder: 023.Brandt_Cormorant|Processing folder: 024.Red_faced_Cormorant|Processing folder: 025.Pel

In [13]:
train_transform = transforms.Compose([transforms.ToTensor()])
train_dataset = CustomDataset(X_train, y_train, transform=train_transform)
test_dataset = CustomDataset(X_test, y_test, transform=train_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model, loss function, and optimizer
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()  # Zero the gradients
        
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)
        
        loss.backward()  # Backward pass
        optimizer.step()  # Optimization step
        
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / total
    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss /= total
    val_accuracy = 100 * correct / total
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%\n")

# Save the model
torch.save(model.state_dict(), 'cub_classifier.pth')

Epoch [1/20], Train Loss: 5.1206, Train Accuracy: 1.52%
Validation Loss: 4.8505, Validation Accuracy: 2.59%

Epoch [2/20], Train Loss: 4.5728, Train Accuracy: 4.13%
Validation Loss: 4.4157, Validation Accuracy: 5.77%

Epoch [3/20], Train Loss: 4.2032, Train Accuracy: 7.80%
Validation Loss: 3.9405, Validation Accuracy: 11.20%

Epoch [4/20], Train Loss: 3.8681, Train Accuracy: 11.85%
Validation Loss: 3.6868, Validation Accuracy: 14.93%

Epoch [5/20], Train Loss: 3.5830, Train Accuracy: 16.38%
Validation Loss: 3.5636, Validation Accuracy: 16.69%

Epoch [6/20], Train Loss: 3.3264, Train Accuracy: 20.28%
Validation Loss: 3.2201, Validation Accuracy: 22.01%

Epoch [7/20], Train Loss: 3.1197, Train Accuracy: 23.66%
Validation Loss: 3.2772, Validation Accuracy: 22.14%

Epoch [8/20], Train Loss: 2.8926, Train Accuracy: 27.78%
Validation Loss: 3.1018, Validation Accuracy: 25.08%

Epoch [9/20], Train Loss: 2.7086, Train Accuracy: 31.21%
Validation Loss: 2.9295, Validation Accuracy: 28.05%

Epoch 