In [None]:
# Imports

In [None]:
import time
import matplotlib.pyplot as plt
import torch
from torch import flatten
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torch.amp import GradScaler, autocast
from PIL import Image

In [None]:
torch.__version__

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Parameters
batch_size = 64
learning_rate = 0.001
num_epochs = 30
img_height, img_width = 224, 224

# Transformations
transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),
    transforms.RandomRotation(degrees=30),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder('data', transform=transform)

# Split the dataset into train and validation sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

print(len(train_dataset))
print(len(test_dataset))

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, prefetch_factor=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, prefetch_factor=2)

# Define the CNN architecture
class CNN(torch.nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )
        self.layer4 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )
        self.layer5 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3, 3), padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )
        self.fc1 = torch.nn.Linear(256*7*7, 512)
        self.fc2 = torch.nn.Linear(512, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = flatten(out, 1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

# Initialize the model, loss function, and optimizer
model = CNN(num_classes=len(dataset.classes)).to(device)  # Move model to GPU
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

scaler = GradScaler('cuda')

start_time = time.time()
# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0    
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        with autocast('cuda'):
            outputs = model(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        # Update running loss
        running_loss += loss.item()

        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        
        # Calculate accuracy
        _, predicted = torch.max(probabilities.data, 1)  # Get the class with the highest probability
        total_preds += labels.size(0)  # Number of labels in the batch
        correct_preds += (predicted == labels).sum().item()  # Count correct predictions
        
        # Print loss and accuracy
        if (i + 1) % 100 == 0:
            avg_loss = running_loss / batch_size
            accuracy = (correct_preds / total_preds) * 100  # Calculate accuracy percentage
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')
            running_loss = 0.0  # Reset running loss for the next set of steps
            correct_preds = 0
            total_preds = 0


print("--- %s seconds ---" % (time.time() - start_time))

# Testing loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        outputs = model(images)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        _, predicted = torch.max(probabilities.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(correct)
print(total)
print(f'Accuracy of the model on the test images: {100 * correct / total:.2f}%')
torch.save(model.state_dict(), 'pytorch_simple_model.pth')

In [None]:
def image_classification(model, image_path):
    
    image = Image.open(image_path).convert("RGB")

    img_height, img_width = 224, 224
    
    transform = transforms.Compose([
        transforms.Resize((img_height, img_width)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    input_tensor = transform(image)
    
    # Add a batch dimension
    input_batch = input_tensor.unsqueeze(0)
    
    # Move the input to the appropriate device
    input_batch = input_batch.to(device)
    
    # Set the model to evaluation mode
    model.eval()
    
    # Disable gradient calculation for inference
    with torch.no_grad():
        output = model(input_batch)
    
    # Get the predicted class (assuming the output is a single class per image)
    _, predicted_class = torch.max(output, 1)
    
    # If the model outputs probabilities and you want the top-k predictions
    probabilities = torch.nn.functional.softmax(output[0], dim=0)
    topk_prob, topk_indices = torch.topk(probabilities, k=5)
    
    # Print the results
    print(f"Predicted class: {predicted_class.item()}")
    print(f"Top-5 classes: {topk_indices}")
    print(f"Top-5 probabilities: {topk_prob}")


In [None]:
print(dataset.class_to_idx)
model = CNN(num_classes=len(dataset.classes)).to(device)
model.load_state_dict(torch.load('pytorch_simple_model.pth', weights_only=True))
image_classification(model, "data/butterfly_test.jpeg")
image_classification(model, "data/cat_test.jpeg")
image_classification(model, "data/chicken_test.jpeg")
image_classification(model, "data/cow_test.jpg")
image_classification(model, "data/dog_test.jpeg")
image_classification(model, "data/elephant_test.jpeg")
image_classification(model, "data/horse_test.jpg")
image_classification(model, "data/sheep_test.jpg")
image_classification(model, "data/spider_test.jpg")
image_classification(model, "data/squirrel_test.jpeg")

In [None]:
import torchvision.models as models
from torchsummary import summary

# Parameters
batch_size = 64
num_epochs = 20
learning_rate = 0.001
img_height, img_width = 224, 224

preprocess = transforms.Compose([
    transforms.Resize((img_height, img_width)),
    transforms.CenterCrop((img_height, img_width)),  # Crop the center 224x224 pixels
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize the image
])

dataset = datasets.ImageFolder('data', transform=preprocess)

num_classes = len(dataset.classes)

# Split the dataset into train and validation sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

print(len(train_dataset))
print(len(test_dataset))

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, prefetch_factor=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True, prefetch_factor=2)

model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)

# Freeze the model's parameters
for param in model.parameters():
    param.requires_grad = False

# Replace the final classifier
model.classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=num_classes)
)

model.to(device)

summary(model, input_size=(3, 224, 224))

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

scaler = GradScaler('cuda')

start_time = time.time()
# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0    
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        with autocast('cuda'):
            outputs = model(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        # Update running loss
        running_loss += loss.item()

        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        
        # Calculate accuracy
        _, predicted = torch.max(probabilities.data, 1)  # Get the class with the highest probability
        total_preds += labels.size(0)  # Number of labels in the batch
        correct_preds += (predicted == labels).sum().item()  # Count correct predictions
        
        # Print loss and accuracy
        if (i + 1) % 100 == 0:
            avg_loss = running_loss / batch_size
            accuracy = (correct_preds / total_preds) * 100  # Calculate accuracy percentage
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')
            running_loss = 0.0  # Reset running loss for the next set of steps
            correct_preds = 0
            total_preds = 0


print("--- %s seconds ---" % (time.time() - start_time))

# Testing loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        outputs = model(images)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        _, predicted = torch.max(probabilities.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(correct)
print(total)
print(f'Accuracy of the model on the test images: {100 * correct / total:.2f}%')

torch.save(model.state_dict(), 'pytorch_mobilenet_model.pth')

In [None]:
print(dataset.class_to_idx)
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)
model.classifier = torch.nn.Sequential(
    torch.nn.Linear(in_features=model.classifier[1].in_features, out_features=num_classes)
)
model.to(device)
model.load_state_dict(torch.load('pytorch_mobilenet_model.pth', weights_only=True))
image_classification(model, "data/butterfly_test.jpeg")
image_classification(model, "data/cat_test.jpeg")
image_classification(model, "data/chicken_test.jpeg")
image_classification(model, "data/cow_test.jpg")
image_classification(model, "data/dog_test.jpeg")
image_classification(model, "data/elephant_test.jpeg")
image_classification(model, "data/horse_test.jpg")
image_classification(model, "data/sheep_test.jpg")
image_classification(model, "data/spider_test.jpg")
image_classification(model, "data/squirrel_test.jpeg")