# Image Scene Classification

Library Import

In [None]:
import torch
import torchvision
import tqdm
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

Image Transformation

In [None]:
transform = transforms.Compose([
    transforms.Resize((512, 512)),  # Resize the input images to a fixed size
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),  # Convert the images to tensors
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # Normalize the images
])

load images folder dataset

In [None]:
dataset = torchvision.datasets.ImageFolder(root='data', transform=transform)

Split dataset into train and validation set

In [None]:
train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=0.2, random_state=42)
train_dataset = Subset(dataset, train_idx)
val_dataset = Subset(dataset, val_idx)

create data loader for both sets

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False)


load pretrained model

In [None]:
model = torchvision.models.resnet18(pretrained=True)

modify model layers based on available class for classification

In [None]:
for param in model.parameters():
    param.requires_grad = False

num_classes = len(dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)

loss function and optimizer definition

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

model hyperparameter

In [None]:
num_epochs = 25
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

fine-tuning the model

In [None]:
for epoch in range(num_epochs):
    print(f'Starting epoch {epoch+1}/{num_epochs}...')
    
    # Training phase
    model.train()
    running_loss = 0.0
    running_corrects = 0
    for images, labels in tqdm.tqdm(train_loader):
        try:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(preds == labels.data)
        except (torchvision.io.PIL.UnidentifiedImageError, OSError):
            pass

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_corrects.double() / len(train_loader.dataset)
    
    print(f'Training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad():
        for images, labels in tqdm.tqdm(val_loader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            val_corrects += torch.sum(preds == labels.data)

    val_loss = val_loss / len(val_loader.dataset)
    val_acc = val_corrects.double() / len(val_loader.dataset)

    print(f'Validation Loss: {val_loss:.4f} Acc: {val_acc:.4f}')

print('Training finished.')

model export 

In [None]:
torch.save(model, 'model.pth')