# Split the Data

In [None]:
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

import os
import shutil
from sklearn.model_selection import train_test_split

# List of classes
classes = ['cloudy', 'desert', 'green_area', 'water']

# Paths
base_dir = "data"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "validate")
test_dir = os.path.join(base_dir, "test")

# Loop over classes
for class_name in classes:
    class_source_dir = os.path.join(base_dir, "source", class_name)  # Path to class folder

    # Get the list of images in the class folder
    images = os.listdir(class_source_dir)

    # Split into train and test sets
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)

    # Split test set into validation and test sets
    val_images, test_images = train_test_split(test_images, test_size=0.5, random_state=42)

    # Create destination directories if they don't exist
    train_class_dir = os.path.join(train_dir, class_name)
    val_class_dir = os.path.join(val_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)

    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(val_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    # Move training images
    for image in train_images:
        source_path = os.path.join(class_source_dir, image)
        destination_path = os.path.join(train_class_dir, image)
        shutil.move(source_path, destination_path)

    # Move validation images
    for image in val_images:
        source_path = os.path.join(class_source_dir, image)
        destination_path = os.path.join(val_class_dir, image)
        shutil.move(source_path, destination_path)

    # Move test images
    for image in test_images:
        source_path = os.path.join(class_source_dir, image)
        destination_path = os.path.join(test_class_dir, image)
        shutil.move(source_path, destination_path)

# The Model

## Training

In [None]:
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, models, transforms
import os

# Define transforms for the training and validation sets
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'validate': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load the datasets with ImageFolder
data_dir = "data"
image_datasets = {
    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
    for x in ['train', 'validate']
}

# Define the dataloaders
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4)
    for x in ['train', 'validate']
}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load the pre-trained ResNet18 model from torchvision.models
model = models.resnet18(pretrained=True)

# Replace the final fully connected layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 4)  # Set the number of output classes

# Move the model to GPU if available
model = model.to(device)

# Define the criterion
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001)


# Define number of epochs
num_epochs = 25

# Train the model
best_loss = float('inf')  # Initialize with a large value
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'validate']:
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()   # Set model to evaluation mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

            # Backward pass and optimization only in training phase
            if phase == 'train':
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(image_datasets[phase])
        epoch_acc = running_corrects.double() / len(image_datasets[phase])

        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

        # Check if validation loss is higher than training loss
        if phase == 'validate' and epoch_loss > running_loss:
            print("Validation loss is higher than training loss. Stopping training.")
            break

        # Update best loss if validation loss is lower
        if phase == 'validate' and epoch_loss < best_loss:
            best_loss = epoch_loss

    print()

    # Check if validation loss hasn't improved for the last 3 epochs
    if epoch > 2 and epoch_loss > best_loss:
        print("Validation loss hasn't improved for the last 3 epochs. Stopping training.")
        break

print('Training complete')

Epoch 0/24
----------
train Loss: 0.5118 Acc: 0.8244
validate Loss: 0.1118 Acc: 0.9822

Epoch 1/24
----------
train Loss: 0.2554 Acc: 0.9194
validate Loss: 0.0858 Acc: 0.9787

Epoch 2/24
----------
train Loss: 0.1932 Acc: 0.9438
validate Loss: 0.0544 Acc: 0.9822

Epoch 3/24
----------
train Loss: 0.1634 Acc: 0.9512
validate Loss: 0.1691 Acc: 0.9396

Validation loss hasn't improved for the last 3 epochs. Stopping training.
Training complete


## Validation

In [None]:
#Transform the test and validation data
test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load the validation dataset
validation_dataset = datasets.ImageFolder(os.path.join(data_dir, 'validate'), test_transforms)
validation_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=4, shuffle=True, num_workers=4)


def validate(model, dataloader):
    model.eval()  # Set model to evaluate mode
    running_corrects = 0
    total_samples = 0

    # Iterate over data
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        total_samples += labels.size(0)

        # Forward pass
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

        # Statistics
        running_corrects += torch.sum(preds == labels.data)

    accuracy = running_corrects.double() / total_samples

    print('Validation Acc: {:.4f}'.format(accuracy))

# Call the validation function
print("Validating the model...")
validate(model, validation_dataloader)

Validating the model...
Validation Acc: 0.9396


## Test

In [None]:
# Load the test dataset
test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), test_transforms)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=4)

def test(model, dataloader):
    model.eval()  # Set model to evaluate mode
    running_corrects = 0
    total_samples = 0

    # Iterate over data
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        total_samples += labels.size(0)

        # Forward pass
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

        # Statistics
        running_corrects += torch.sum(preds == labels.data)

    accuracy = running_corrects.double() / total_samples

    print('Test Acc: {:.4f}'.format(accuracy))

# Call the test function
print("Testing the model...")
test(model, test_dataloader)

Testing the model...
Test Acc: 0.9096


## Preformance

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

def compute_metrics(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []

    # Iterate over data
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

        # Save all predictions and true labels
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    # Compute confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    print('Confusion Matrix:')
    print(cm)

    # Compute classification report
    cr = classification_report(all_labels, all_preds, target_names=dataloader.dataset.classes)
    print('Classification Report:')
    print(cr)

# Compute metrics on test set
print("Computing metrics on test set...")
compute_metrics(model, test_dataloader)

Computing metrics on test set...
Confusion Matrix:
[[150   0   0   0]
 [  0 114   0   0]
 [  0   0 101  49]
 [  1   0   1 148]]
Classification Report:
              precision    recall  f1-score   support

      cloudy       0.99      1.00      1.00       150
      desert       1.00      1.00      1.00       114
  green_area       0.99      0.67      0.80       150
       water       0.75      0.99      0.85       150

    accuracy                           0.91       564
   macro avg       0.93      0.92      0.91       564
weighted avg       0.93      0.91      0.91       564

