In [72]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, datasets
import os
from PIL import Image
from torchvision import models

In [73]:
cheetah="Cheetah"
crocodile="Crocodile"
elephants="Elephants"
tiger="Tiger"

In [74]:
class Animals(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for label, sub_dir in enumerate(os.listdir(root_dir)):
            sub_dir_path = os.path.join(root_dir, sub_dir)
            for img_name in os.listdir(sub_dir_path):
                self.image_paths.append(os.path.join(sub_dir_path, img_name))
                self.labels.append(label)
                
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [75]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])


In [76]:
dataset = Animals(root_dir='datafiles', transform=transform)

train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


Use of pretrained model made the accuracy good and reasonable . I tried using normal neural network but it was taking a lot of time.

In [82]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(64 * 64 * 3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 4)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation function here
        return x

normalmodel = SimpleNN()



In [100]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(normalmodel.parameters(), lr=0.00001)

First we will look for normal model and then we will use pretrained model to see the effects

In [101]:
num_epochs = 200
for epoch in range(num_epochs):
    normalmodel.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = normalmodel(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

Epoch 1, Loss: 1.1765692436948736
Epoch 2, Loss: 1.1701892360727837
Epoch 3, Loss: 1.1626416051641424
Epoch 4, Loss: 1.1612188270751467
Epoch 5, Loss: 1.156519590540135
Epoch 6, Loss: 1.146679255556553
Epoch 7, Loss: 1.1472146561805239
Epoch 8, Loss: 1.14705099450781
Epoch 9, Loss: 1.1417436029048675
Epoch 10, Loss: 1.1325724530727306
Epoch 11, Loss: 1.1287063918215163
Epoch 12, Loss: 1.1237654571837568
Epoch 13, Loss: 1.1252775927807421
Epoch 14, Loss: 1.1194862393622702
Epoch 15, Loss: 1.1148719635415585
Epoch 16, Loss: 1.1085471485523468
Epoch 17, Loss: 1.1088402778544324
Epoch 18, Loss: 1.105796900201351
Epoch 19, Loss: 1.095882438598795
Epoch 20, Loss: 1.0945431577398421
Epoch 21, Loss: 1.0855987896310522
Epoch 22, Loss: 1.085392158082191
Epoch 23, Loss: 1.0783882838614443
Epoch 24, Loss: 1.0748787243315514
Epoch 25, Loss: 1.0687637887102492
Epoch 26, Loss: 1.0667651105434337
Epoch 27, Loss: 1.0618072545274775
Epoch 28, Loss: 1.0524711101613147
Epoch 29, Loss: 1.0510685202923227
E

In [102]:
from sklearn.metrics import confusion_matrix

normalmodel.eval()
correct = 0
total = 0
all_labels = []
all_predictions = []
total_loss = 0.0

criterion = nn.CrossEntropyLoss()  # Define your loss function

with torch.no_grad():
    for images, labels in test_loader:
        # images, labels = images.to(device), labels.to(device)
        outputs = normalmodel(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)
conf_matrix = confusion_matrix(all_labels, all_predictions)
# print(all_labels)
# print(all_predictions)
print(f'Test Accuracy: {accuracy:.2f}%')
print(f'Test Loss: {average_loss:.4f}')
print('Confusion Matrix:')
print(conf_matrix)

Test Accuracy: 58.08%
Test Loss: 1.0275
Confusion Matrix:
[[27  5  5 11]
 [ 9 27  4  8]
 [ 7  1 18  1]
 [11  5  3 25]]


I will now show the pretrained model effect which I found very impressive .

In [93]:
model = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 4)

In [103]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)

In [79]:
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

Epoch 1, Loss: 1.3818558378422514
Epoch 2, Loss: 0.9493148022509635
Epoch 3, Loss: 0.7353040335026193
Epoch 4, Loss: 0.5752794882084461
Epoch 5, Loss: 0.47486936855823436
Epoch 6, Loss: 0.3797557791496845
Epoch 7, Loss: 0.29349237395093797
Epoch 8, Loss: 0.25419414550700087
Epoch 9, Loss: 0.2060940994861278
Epoch 10, Loss: 0.17979888554583204
Epoch 11, Loss: 0.14561256052965812
Epoch 12, Loss: 0.1338170684398489
Epoch 13, Loss: 0.0993003988678151
Epoch 14, Loss: 0.09962723594396672
Epoch 15, Loss: 0.08607058294434497
Epoch 16, Loss: 0.07561213507297192
Epoch 17, Loss: 0.06630370476619994
Epoch 18, Loss: 0.059379578587856696
Epoch 19, Loss: 0.05761527979469046
Epoch 20, Loss: 0.04576109595438267
Epoch 21, Loss: 0.05056770252896116
Epoch 22, Loss: 0.04106190467768527
Epoch 23, Loss: 0.032671037684888285
Epoch 24, Loss: 0.03462227495347566
Epoch 25, Loss: 0.03621509839641921
Epoch 26, Loss: 0.03144850732481226
Epoch 27, Loss: 0.030429314059066646
Epoch 28, Loss: 0.03174481512186058
Epoch 

In [81]:
model.eval()
correct = 0
total = 0
all_labels = []
all_predictions = []
total_loss = 0.0

criterion = nn.CrossEntropyLoss()  # Define your loss function

with torch.no_grad():
    for images, labels in test_loader:
        # images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

accuracy = 100 * correct / total
average_loss = total_loss / len(test_loader)
conf_matrix = confusion_matrix(all_labels, all_predictions)
# print(all_labels)
# print(all_predictions)
print(f'Test Accuracy: {accuracy:.2f}%')
print(f'Test Loss: {average_loss:.4f}')
print('Confusion Matrix:')
print(conf_matrix)


Test Accuracy: 88.62%
Test Loss: 0.2767
Confusion Matrix:
[[42  2  1  3]
 [ 5 41  0  2]
 [ 1  0 26  0]
 [ 4  0  1 39]]
