In [3]:
import torch
import torch.backends.mps

# Setup device-agnostic code 
if torch.cuda.is_available():
    device = torch.device("cuda") # NVIDIA GPU
elif torch.backends.mps.is_available():
    device = torch.device("mps") # Apple GPU
else:
    device = torch.device("cpu") # Defaults to CPU if NVIDIA GPU/Apple GPU aren't available

print(f"Using device: {device}")

Using device: mps


In [4]:
from pathlib import Path
from torchvision import transforms, datasets

image_path = Path("/Users/alextsagkas/Document/Office/solar_panels/data/")

train_dir = image_path / "train"
test_dir = image_path / "test"

train_data_transform = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    transforms.RandomHorizontalFlip(p=0.5), 
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(
    root=str(train_dir),
    transform=train_data_transform, 
    target_transform=None
)

In [21]:
class_dict = train_dataset.class_to_idx
class_dict

{'clean': 0, 'soiled': 1}

In [5]:
import torch
from torch import nn


class TinyVGG(nn.Module):
    """
    Model architecture copying TinyVGG from: 
    https://poloclub.github.io/cnn-explainer/
    """

    def __init__(self, input_shape: int, hidden_units: int, output_shape: int) -> None:
        super().__init__()
        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(
                in_channels=input_shape,
                out_channels=hidden_units,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.Conv2d(
                in_channels=hidden_units,
                out_channels=hidden_units,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=2,
                stride=2
            )
        )

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units, hidden_units, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=hidden_units * 16 * 16,
                out_features=output_shape
            )
        )

    def forward(self, x: torch.Tensor):
        return self.classifier(self.conv_block_2(self.conv_block_1(x)))


In [6]:
from torchinfo import summary

HIDDEN_UNITS = 32

model = TinyVGG(
    input_shape=3,
    hidden_units=HIDDEN_UNITS,
    output_shape=2
).to(device)

img, _ = train_dataset[0]

summary(model, input_size=(img.unsqueeze(0).shape))

Layer (type:depth-idx)                   Output Shape              Param #
TinyVGG                                  [1, 2]                    --
├─Sequential: 1-1                        [1, 32, 32, 32]           --
│    └─Conv2d: 2-1                       [1, 32, 64, 64]           896
│    └─ReLU: 2-2                         [1, 32, 64, 64]           --
│    └─Conv2d: 2-3                       [1, 32, 64, 64]           9,248
│    └─ReLU: 2-4                         [1, 32, 64, 64]           --
│    └─MaxPool2d: 2-5                    [1, 32, 32, 32]           --
├─Sequential: 1-2                        [1, 32, 16, 16]           --
│    └─Conv2d: 2-6                       [1, 32, 32, 32]           9,248
│    └─ReLU: 2-7                         [1, 32, 32, 32]           --
│    └─Conv2d: 2-8                       [1, 32, 32, 32]           9,248
│    └─ReLU: 2-9                         [1, 32, 32, 32]           --
│    └─MaxPool2d: 2-10                   [1, 32, 16, 16]           --
├─Seq

In [22]:
from torch import nn
import torch.utils.data
from torchmetrics.classification import MulticlassAccuracy, MulticlassPrecision, MulticlassRecall, MulticlassFBetaScore

# Define the training function
def train(
    model: torch.nn.Module,
    device: torch.device, 
    train_loader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module, 
    optimizer: torch.optim.Optimizer
) -> tuple[float, float, float, float, float]:

    model.train()

    train_loss, train_acc = 0, 0

    # Classification Metrics
    accuracy_fn = MulticlassAccuracy(
        num_classes=2
    ).to(device)
    precision_fn = MulticlassPrecision(
        num_classes=2
    ).to(device)
    recall_fn = MulticlassRecall(
        num_classes=2
    ).to(device)
    f_score_fn = MulticlassFBetaScore(
        beta=2.0, # count more on recall
        num_classes=2
    ).to(device)

    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)

        loss = loss_fn(output, target)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        pred_classs = output.argmax(dim=1)

        accuracy_fn.update(pred_classs, target)
        precision_fn.update(pred_classs, target)
        recall_fn.update(pred_classs, target)
        f_score_fn.update(pred_classs, target)

    train_loss = train_loss / len(train_loader)

    train_acc = accuracy_fn.compute().item()
    train_pr = precision_fn.compute().item()
    train_rc = recall_fn.compute().item()
    train_fscore = f_score_fn.compute().item()

    return train_loss, train_acc, train_pr, train_rc, train_fscore

In [23]:
import torch
from torch.utils.data import DataLoader
import torch.utils.data
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import KFold
from torch import optim
import numpy as np
from tqdm import tqdm

# Define the number of folds and batch size
K_FOLDS = 2 
BATCH_SIZE = 32
LEARNING_RATE = 0.001
NUM_EPOCHS = 2

# Loss function 
loss_fn = nn.CrossEntropyLoss()
kf = KFold(n_splits=K_FOLDS, shuffle=True)

# Loop through each fold
results = {}

train_indecies = np.arange(len(train_dataset))

for fold, (train_idx, test_idx) in enumerate(kf.split(train_indecies)):
    print(f"Fold {fold + 1}")
    print("-------")

    # Define the data loaders for the current fold
    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        sampler=SubsetRandomSampler(train_idx.tolist()),
    )
    test_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        sampler=SubsetRandomSampler(test_idx.tolist()),
    )

    # Initialize the model and optimizer
    model = TinyVGG(
        input_shape=3,
        hidden_units=HIDDEN_UNITS,
        output_shape=2
    ).to(device)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # Train the model on the current fold
    for epoch in tqdm(range(NUM_EPOCHS)):
        train_loss, train_acc, train_pr, train_rc, train_fscore = train(
            model, 
            device, 
            train_loader, 
            loss_fn, 
            optimizer
        )
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"train_pr: {train_pr:.4f} | "
            f"train_rc: {train_rc:.4f} |"
            f"train_fscore: {train_fscore:.4f} | "
        )

    # Evaluate the model on the test set
    model.eval()

    test_loss = 0
    correct = 0

    with torch.inference_mode():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)

            test_loss += loss_fn(output, target).item()

            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()

    test_loss = test_loss / len(test_loader)
    test_acc = correct / (len(test_loader) * BATCH_SIZE)

    results[fold] = test_acc

    # Print the results for the current fold
    print(f"test_loss: {test_loss:.4f}, test_acc: {test_acc:.2f}%\n")

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {K_FOLDS} FOLDS')
print('-----------------------------------------')

sum = 0.0

for key, value in results.items():
    print(f'Fold {key + 1}: {value * 100:.2f} %')
    sum += value

print(f'Average: {sum/len(results.items()) * 100:.2f} %')

Fold 1
-------


 50%|█████     | 1/2 [00:32<00:32, 32.12s/it]

Epoch: 1 | train_loss: 0.2159 | train_acc: 0.8960 | train_pr: 0.9057 | train_rc: 0.8960 |train_fscore: 0.8977 |


100%|██████████| 2/2 [01:01<00:00, 30.83s/it]

Epoch: 2 | train_loss: 0.0834 | train_acc: 0.9662 | train_pr: 0.9672 | train_rc: 0.9662 |train_fscore: 0.9664 |





test_loss: 0.0497, test_acc: 0.98%

Fold 2
-------


 50%|█████     | 1/2 [00:30<00:30, 30.38s/it]

Epoch: 1 | train_loss: 0.2873 | train_acc: 0.8409 | train_pr: 0.8620 | train_rc: 0.8409 |train_fscore: 0.8440 |


100%|██████████| 2/2 [01:00<00:00, 30.12s/it]

Epoch: 2 | train_loss: 0.0863 | train_acc: 0.9651 | train_pr: 0.9667 | train_rc: 0.9651 |train_fscore: 0.9654 |





test_loss: 0.0575, test_acc: 0.98%

K-FOLD CROSS VALIDATION RESULTS FOR 2 FOLDS
-----------------------------------------
Fold 1: 98.13 %
Fold 2: 97.76 %
Average: 97.94 %
