In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Set device to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for training')

# Define transformations for the training and val sets
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the datasets with ImageFolder
train_dataset = datasets.ImageFolder('train', transform=train_transforms)
val_dataset = datasets.ImageFolder('val', transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load the pre-trained model and modify the final layer
resnet50 = models.resnet50(pretrained=True)
num_ftrs = resnet50.fc.in_features
resnet50.fc = nn.Linear(num_ftrs, len(train_dataset.classes))

resnet50 = resnet50.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet50.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    resnet50.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

    # Evaluate on val set
    resnet50.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet50(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Accuracy on val set: {100 * correct / total}%')

# Save the trained model
torch.save(resnet50.state_dict(), 'resnet50_custom.pth')
print('Model saved to resnet50_custom.pth')

Using cuda for training
Epoch 1/10, Loss: 1.186361270106357
Accuracy on val set: 58.11878664287535%
Epoch 2/10, Loss: 1.0012009525150987
Accuracy on val set: 54.93244965587561%
Epoch 3/10, Loss: 0.9393645013156144
Accuracy on val set: 65.81697680346673%
Epoch 4/10, Loss: 0.8904208975919286
Accuracy on val set: 66.09737445832272%
Epoch 5/10, Loss: 0.8585899582375651
Accuracy on val set: 66.73464185572266%
Epoch 6/10, Loss: 0.8280266004134409
Accuracy on val set: 64.59342340045883%
Epoch 7/10, Loss: 0.8032763455965504
Accuracy on val set: 70.71119041549834%
Epoch 8/10, Loss: 0.7688463548078793
Accuracy on val set: 72.29161356105021%
Epoch 9/10, Loss: 0.7572251469143676
Accuracy on val set: 72.85240887076218%
Epoch 10/10, Loss: 0.7370958935091461
Accuracy on val set: 73.36222278868213%
Model saved to resnet50_custom.pth
