In [2]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import os
from tqdm import tqdm  # Progress bar

In [3]:
# Define device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Data transformations - mobilenet_v2
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)), # Resize images to match VGG16 input size
        transforms.RandomHorizontalFlip(), # Augmentation for train data
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) # mobilenet normalization
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
}

In [5]:
# Paths to the train and test folders
data_dir = r'C:\Users\T460\OneDrive - BTH Student\Documents\Dev\1. LTU\4 BIP\Assignment\dataset_dogs_vs_cats'
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')

In [6]:
# Load data
image_datasets = {
    'train': datasets.ImageFolder(train_dir, transform=data_transforms['train']),
    'test': datasets.ImageFolder(test_dir, transform=data_transforms['test'])
}

# Data loaders
batch_size = 8
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True),
    'test': DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=False)
}

tried with VGG16 model but it takes very long time to train so tried with mobilenet instead

In [9]:
# Load pre-trained mobilenet model
model = models.mobilenet_v2(pretrained=True)
num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, 2)  # 2 classes: cats and dogs

# Transfer learning
# Freeze all layers except the last fully connected layer
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last fully connected layer
for param in model.classifier[1].parameters():
    param.requires_grad = True

# Move model to device
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
# Training loop
num_epochs = 2
best_acc = 0.0

for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    print('-' * 10)

    # Each epoch has a training and testing phase
    for phase in ['train', 'test']:
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()  # Set model to evaluate mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data with tqdm for progress bar
        with tqdm(dataloaders[phase], unit="batch") as tepoch:
            for inputs, labels in tepoch:
                tepoch.set_description(f"{phase.capitalize()} Epoch {epoch+1}/{num_epochs}")
                
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                # Update progress bar with current loss
                tepoch.set_postfix(loss=loss.item())

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save the model if it has the best accuracy on the test set
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                #torch.save(model.state_dict(), 'best_model_vgg16_dogs_vs_cats.pth')
                torch.save(model.state_dict(), 'best_model_mobilenet_dogs_vs_cats.pth')
                print(f'Model saved with accuracy: {best_acc:.4f}')

    print()

print('Training complete')
print(f'Best test accuracy: {best_acc:.4f}')

Epoch 1/2
----------


Train Epoch 1/2: 100%|██████████| 1860/1860 [20:00<00:00,  1.55batch/s, loss=0.00486] 


train Loss: 0.1933 Acc: 0.9228


Test Epoch 1/2: 100%|██████████| 628/628 [05:32<00:00,  1.89batch/s, loss=0.0199]  


test Loss: 0.0610 Acc: 0.9783
Model saved with accuracy: 0.9783

Epoch 2/2
----------


Train Epoch 2/2: 100%|██████████| 1860/1860 [22:06<00:00,  1.40batch/s, loss=0.772]   


train Loss: 0.1894 Acc: 0.9279


Test Epoch 2/2: 100%|██████████| 628/628 [06:43<00:00,  1.56batch/s, loss=0.0131]  

test Loss: 0.0615 Acc: 0.9773

Training complete
Best test accuracy: 0.9783





In [13]:
from PIL import Image

# Define the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the trained model (mobilenet_v2)
model = models.mobilenet_v2(pretrained=True)
num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, 2)  # 2 classes: cats and dogs

# Load the saved model state_dict
model.load_state_dict(torch.load(r'C:\Users\T460\OneDrive - BTH Student\Documents\Dev\1. LTU\4 BIP\Assignment\best_model_mobilenet_dogs_vs_cats.pth'))

# Move the model to the appropriate device
model = model.to(device)
model.eval()  # Set the model to evaluation mode

# Define the same transformations that were applied during training
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize image to match VGG16 input size
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Mobilenet normalization
])

# Load a sample image (replace 'sample_image.jpg' with your image path)
img_path = r'C:\Users\T460\OneDrive - BTH Student\Documents\Dev\1. LTU\4 BIP\Assignment\dog 1.jfif'  # Change this to the path of the image you want to classify
img = Image.open(img_path)

# Apply the transformations to the image
img_tensor = data_transforms(img).unsqueeze(0)  # Add a batch dimension (1, 3, 224, 224)
img_tensor = img_tensor.to(device)

# Make a prediction
with torch.no_grad():  # No need to calculate gradients during inference
    outputs = model(img_tensor)
    _, preds = torch.max(outputs, 1)  # Get the predicted class (0 or 1)

# Display the prediction
if preds == 0:
    print("Prediction: Cat")
else:
    print("Prediction: Dog")

Prediction: Dog
