In [1]:
import os
import pandas as pd
import numpy as np

# plot photos from dataset
from matplotlib import pyplot
from matplotlib.image import imread

#### Setting up the directory to data

In [None]:
base_dir = os.getcwd()
print("Base directory: ", base_dir)
data_dir = os.path.join(base_dir, 'dataset')
print("Data directory: ", data_dir)

#### The different class of data is identified and stored as Categories

In [3]:
Categories = os.listdir(data_dir)
print("Different categores: ", Categories)

Different categores:  ['accordion', 'bass', 'camera', 'crocodile', 'crocodile_head', 'cup', 'dollar_bill', 'emu', 'gramophone', 'hedgehog', 'nautilus', 'pizza', 'pyramid', 'sea_horse', 'windsor_chair']


#### Read the data for each category and split into Train and Test

In [4]:
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input

X_img_train=[]
Y_label_train=[]

X_img_test=[]
Y_label_test=[]

for c in Categories:
    img_cnt = 0
    directory_path = os.path.join(data_dir, c)
    files = os.listdir(directory_path)
    files.sort()
    for file in files:
        img_cnt = img_cnt + 1
        img_path = os.path.join(directory_path, file)
        img = image.load_img(img_path, target_size=(224, 224))
        X_img = image.img_to_array(img)
        X_img = np.expand_dims(X_img, axis=0)
        X_img = preprocess_input(X_img)
        if img_cnt <= 40:                       # first 40 image is taken as Train
            X_img_train.append(X_img)
            Y_label_train.append(c)
        else:                                   # after the first 40 image is for Test
            X_img_test.append(X_img)
            Y_label_test.append(c)

### Getting the data ready

In [5]:
X_img_train = np.array(X_img_train)
print ("X_img_train shape: ", X_img_train.shape)

X_img_test = np.array(X_img_test)
print ("X_img_test shape: ", X_img_test.shape)

X_img_train shape:  (600, 1, 224, 224, 3)
X_img_test shape:  (205, 1, 224, 224, 3)


In [6]:
X_train = np.squeeze(X_img_train)  # Remove the singleton dimension
print("X_train shape: ",X_train.shape)

X_test = np.squeeze(X_img_test)    # Remove the singleton dimension
print("X_test shape: ",X_test.shape)

X_train shape:  (600, 224, 224, 3)
X_test shape:  (205, 224, 224, 3)


In [7]:
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import keras.utils as utils

# convert class labels to categorical (one hot encoding)
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(Y_label_train)
Y_train = tf.keras.utils.to_categorical(encoded_labels, 15)
print("Y_train shape: ", Y_train.shape)

encoded_labels = label_encoder.fit_transform(Y_label_test)
Y_test = tf.keras.utils.to_categorical(encoded_labels, 15)
print("Y_test shape: ", Y_test.shape)

Y_train shape:  (600, 15)
Y_test shape:  (205, 15)


In [8]:
import torch

# Convert to PyTorch tensors
# Change shape to (N, C, H, W)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).permute(0, 3, 1, 2)  
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).permute(0, 3, 1, 2)

# Convert labels to tensors
y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

In [10]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

# Create dataset objects
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## Loading the pretrained ResNet-18 model

In [12]:
import torchvision
import torchvision.models as models

# Load pretrained ResNet-18 model
resnet18_model = models.resnet18(pretrained=True)

# Modify the final layer to match the number of classes
num_classes = 15
resnet18_model.fc = torch.nn.Linear(resnet18_model.fc.in_features, num_classes)

#### Training the ResNet-18 model with the data

In [14]:
import torch.optim as optim
import torch.nn as nn

# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()  # Since the labels are one-hot encoded
optimizer = optim.Adam(resnet18_model.parameters(), lr=0.001)

# Training loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet18_model = resnet18_model.to(device)

num_epochs = 5
for epoch in range(num_epochs):
    resnet18_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward + backward + optimize
        outputs = resnet18_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

print('Finished Training')


Epoch [1/5], Loss: 0.16558562748526273
Epoch [2/5], Loss: 0.03604080978977053
Epoch [3/5], Loss: 0.022243670114365063
Epoch [4/5], Loss: 0.016197001953658304
Epoch [5/5], Loss: 0.014117272987373565
Finished Training


#### Evaluate the model with scores

In [15]:
from sklearn.metrics import precision_score, recall_score

# Evaluation loop
resnet18_model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = resnet18_model(inputs)
        preds = torch.sigmoid(outputs).cpu().numpy()  # Apply sigmoid to get probabilities
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

# Convert predictions to binary (0 or 1)
threshold = 0.5
all_preds = np.array(all_preds) > threshold
all_labels = np.array(all_labels)

# Calculate precision and recall for each class
precision = precision_score(all_labels, all_preds, average=None)
recall = recall_score(all_labels, all_preds, average=None)

for i in range(num_classes):
    print(f"Class {i}: Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}")

Class 0: Precision: 1.0000, Recall: 0.9333
Class 1: Precision: 0.8182, Recall: 0.6429
Class 2: Precision: 1.0000, Recall: 0.9000
Class 3: Precision: 0.5882, Recall: 1.0000
Class 4: Precision: 1.0000, Recall: 0.2727
Class 5: Precision: 0.8824, Recall: 0.8824
Class 6: Precision: 1.0000, Recall: 1.0000
Class 7: Precision: 1.0000, Recall: 0.8462
Class 8: Precision: 1.0000, Recall: 0.7273
Class 9: Precision: 0.9231, Recall: 0.8571
Class 10: Precision: 0.8750, Recall: 0.9333
Class 11: Precision: 0.9167, Recall: 0.8462
Class 12: Precision: 0.9412, Recall: 0.9412
Class 13: Precision: 1.0000, Recall: 0.9412
Class 14: Precision: 1.0000, Recall: 0.9375


## Loading pretrained DenseNet-121 model

In [None]:
import torchvision.models as models

# Load pretrained DenseNet-121 model
densenet121_model = models.densenet121(pretrained=True)

# Modify the classifier to match the number of classes
num_classes = 15
densenet121_model.classifier = torch.nn.Linear(densenet121_model.classifier.in_features, num_classes)

#### Training the model with our data

In [19]:
import torch.optim as optim
import torch.nn as nn

criterion = nn.BCEWithLogitsLoss()  # Since labels are one-hot encoded
optimizer = optim.Adam(densenet121_model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
densenet121_model = densenet121_model.to(device)

num_epochs = 3
for epoch in range(num_epochs):
    densenet121_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = densenet121_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print('Finished Training')

Epoch [1/3], Loss: 0.0644
Epoch [2/3], Loss: 0.0324
Epoch [3/3], Loss: 0.0296
Finished Training


#### Evaluate the model with scores

In [20]:
from sklearn.metrics import precision_score, recall_score

densenet121_model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = densenet121_model(inputs)
        preds = torch.sigmoid(outputs).cpu().numpy()  # Apply sigmoid to get probabilities
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

threshold = 0.5
all_preds = np.array(all_preds) > threshold
all_labels = np.array(all_labels)

precision = precision_score(all_labels, all_preds, average=None)
recall = recall_score(all_labels, all_preds, average=None)

for i in range(num_classes):
    print(f"Class {i}: Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}")


Class 0: Precision: 1.0000, Recall: 0.9333
Class 1: Precision: 0.9286, Recall: 0.9286
Class 2: Precision: 0.7273, Recall: 0.8000
Class 3: Precision: 0.8750, Recall: 0.7000
Class 4: Precision: 0.8571, Recall: 0.5455
Class 5: Precision: 1.0000, Recall: 0.3529
Class 6: Precision: 0.5714, Recall: 1.0000
Class 7: Precision: 1.0000, Recall: 0.7692
Class 8: Precision: 0.8182, Recall: 0.8182
Class 9: Precision: 1.0000, Recall: 0.7857
Class 10: Precision: 0.9333, Recall: 0.9333
Class 11: Precision: 0.7857, Recall: 0.8462
Class 12: Precision: 1.0000, Recall: 0.5294
Class 13: Precision: 0.9091, Recall: 0.5882
Class 14: Precision: 1.0000, Recall: 0.8125


In [None]:
import torchvision.models as models

# Load pretrained VGG-19 model
vgg19_model = models.vgg19(pretrained=True)

# Modify the classifier to match the number of classes
# classifier[6] is the final Linear layer that outputs 1000 classes for the ImageNet dataset. 
# We need to modify this layer to output 15 classes instead
num_classes = 15
vgg19_model.classifier[6] = torch.nn.Linear(vgg19_model.classifier[6].in_features, num_classes)

In [None]:
import torch.optim as optim
import torch.nn as nn

criterion = nn.BCEWithLogitsLoss()  # Since labels are one-hot encoded
optimizer = optim.Adam(vgg19_model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vgg19_model = vgg19_model.to(device)

num_epochs = 2
for epoch in range(num_epochs):
    vgg19_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = vgg19_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print('Finished Training')

Epoch [1/2], Loss: 1.2876


In [None]:
from sklearn.metrics import precision_score, recall_score

vgg19_model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = vgg19_model(inputs)
        preds = torch.sigmoid(outputs).cpu().numpy()  # Apply sigmoid to get probabilities
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())

threshold = 0.5
all_preds = np.array(all_preds) > threshold
all_labels = np.array(all_labels)

precision = precision_score(all_labels, all_preds, average=None)
recall = recall_score(all_labels, all_preds, average=None)

for i in range(num_classes):
    print(f"Class {i}: Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}")
