In [1]:
import torch
import os
import numpy as np
from glob import glob


import torch 
from torch.utils.data import Dataset,Subset, DataLoader, TensorDataset, ConcatDataset
import torchvision
import os
from PIL import Image, ImageFile
from torchvision import transforms, datasets
from pathlib import Path
# split validation set into new train and validation set
from sklearn.model_selection import train_test_split
#plot examples
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import torch.nn as nn
torch.manual_seed(42)
np.random.seed(42)
import copy

from baselineCNN import *

ImageFile.LOAD_TRUNCATED_IMAGES = True



In [2]:
dataset_path = Path('./../wildfire-prediction-dataset')

pretrain_path = dataset_path / 'train'
val_path = dataset_path / 'valid'
test_path = dataset_path / 'test'

dataset = datasets.ImageFolder(test_path, transform=transforms.ToTensor())

def get_all_datasets(pretrain_path, val_path, test_path, transforms):
    
    pretrain_dataset = datasets.ImageFolder(pretrain_path, transform=transforms['pretrain'])
    val_dataset = datasets.ImageFolder(val_path, transform=transforms['valid'])
    test_dataset = datasets.ImageFolder(test_path, transform=transforms['test'])
    train_idx, validation_idx = train_test_split(np.arange(len(val_dataset)),
                                             test_size=0.2,
                                             random_state=42,
                                             shuffle=True,
                                             stratify=val_dataset.targets)
    train_dataset = Subset(val_dataset, train_idx)
    val_dataset = Subset(val_dataset, validation_idx)
    
    return pretrain_dataset, train_dataset, val_dataset, test_dataset

    num_epochs = 10
batch_size = 32  


# Data transformations
data_transforms = {
    'pretrain': transforms.Compose([
        transforms.ToTensor(),
    ]),
    'valid': transforms.Compose([
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
    ]),
}
_, _, _, test_dataset = get_all_datasets(pretrain_path, val_path, test_path, data_transforms)

test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=6)


In [5]:
def validate(model, data_loader, loss_fn, device):
    model.eval()
    losses = []
    correct_predictions = 0
    with torch.no_grad():
        for x, y in tqdm(data_loader):
            x = x.to(device).half()  # Convert to float16
            y = y.to(device)
            with torch.amp.autocast('cuda'):
                y_hat = model(x)
                loss = loss_fn(y_hat, y)
            losses.append(loss.item())
            correct_predictions += (y == y_hat.argmax(1)).sum().item()
    return losses, correct_predictions

In [6]:

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Get all model files from the folder
folder_path = "saved_models"
model_files = glob(os.path.join(folder_path, "*.pth"))

# Dictionary to store results
results = {}

# Evaluate each model
criterion = nn.CrossEntropyLoss()

for model_path in model_files:
    model_name = os.path.basename(model_path)
    print(f"\nEvaluating model: {model_name}")
    
    # Initialize model and load weights
    model = BaselineModel()  # Using your existing baseline model
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    model = model.to(device)
    
    # Evaluate
    model.eval()
    test_loss, correct_predictions = validate(model, test_data_loader, criterion, device)
    accuracy = correct_predictions / len(test_dataset)
    avg_loss = np.mean(test_loss)
    
    # Store results
    results[model_name] = {
        'accuracy': accuracy,
        'loss': avg_loss,
        'path': model_path
    }
    
    print(f'Test Loss: {avg_loss:.4f} Test Accuracy: {accuracy:.4f}')

# Rank models by accuracy
ranked_by_accuracy = sorted(
    [(name, data['accuracy']) for name, data in results.items()],
    key=lambda x: x[1],
    reverse=True
)

# Print rankings
print("\nModel Rankings (by accuracy):")
print("-" * 50)
for i, (model_name, accuracy) in enumerate(ranked_by_accuracy, 1):
    print(f"{i}. {model_name}")
    print(f"   Accuracy: {accuracy:.4f}")
    print(f"   Loss: {results[model_name]['loss']:.4f}")
print("-" * 50)

# Print best model details
best_model_name = ranked_by_accuracy[0][0]
print(f"\nBest model: {best_model_name}")
print(f"Path: {results[best_model_name]['path']}")
print(f"Accuracy: {results[best_model_name]['accuracy']:.4f}")
print(f"Loss: {results[best_model_name]['loss']:.4f}")

Using device: cuda

Evaluating model: finetuned_model_iter0.pth


  checkpoint = torch.load(model_path, map_location=device)
100%|██████████| 197/197 [00:14<00:00, 13.25it/s]


Test Loss: 0.2455 Test Accuracy: 0.9330

Evaluating model: finetuned_model_iter1.pth


100%|██████████| 197/197 [00:14<00:00, 13.44it/s]


Test Loss: 0.2708 Test Accuracy: 0.8879

Evaluating model: finetuned_model_iter2.pth


100%|██████████| 197/197 [00:14<00:00, 13.32it/s]


Test Loss: 0.4745 Test Accuracy: 0.7814

Evaluating model: baseline.pth


100%|██████████| 197/197 [00:14<00:00, 13.49it/s]


Test Loss: 0.1906 Test Accuracy: 0.9417

Evaluating model: finetuned_model2_iter1.pth


100%|██████████| 197/197 [00:14<00:00, 13.34it/s]


Test Loss: 0.2548 Test Accuracy: 0.9121

Evaluating model: finetuned_model2_iter0.pth


100%|██████████| 197/197 [00:14<00:00, 13.15it/s]

Test Loss: 0.2390 Test Accuracy: 0.9379

Model Rankings (by accuracy):
--------------------------------------------------
1. baseline.pth
   Accuracy: 0.9417
   Loss: 0.1906
2. finetuned_model2_iter0.pth
   Accuracy: 0.9379
   Loss: 0.2390
3. finetuned_model_iter0.pth
   Accuracy: 0.9330
   Loss: 0.2455
4. finetuned_model2_iter1.pth
   Accuracy: 0.9121
   Loss: 0.2548
5. finetuned_model_iter1.pth
   Accuracy: 0.8879
   Loss: 0.2708
6. finetuned_model_iter2.pth
   Accuracy: 0.7814
   Loss: 0.4745
--------------------------------------------------

Best model: baseline.pth
Path: saved_models/baseline.pth
Accuracy: 0.9417
Loss: 0.1906



