In [61]:
import os
import shutil
import random

def partition_images(base_dir, train_dir, test_dir, train_ratio):
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    all_images = os.listdir(base_dir)
    random.shuffle(all_images)

    train_count = int(len(all_images) * train_ratio)
    train_images = all_images[:train_count]
    test_images = all_images[train_count:]

    for image in train_images:
        shutil.copyfile(os.path.join(base_dir, image), os.path.join(train_dir, image))

    for image in test_images:
        shutil.copyfile(os.path.join(base_dir, image), os.path.join(test_dir, image))

partition_images('images', 'train', 'test', 0.8)


In [66]:
# Model evaluation function
def evaluate_model(model, test_dataset):
    model.eval()
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    total_mae = torch.zeros(2, device=device)
    total_mape = torch.zeros(2, device=device)
    total_smape = torch.zeros(2, device=device)
    total_mse = torch.zeros(2, device=device)
    total_count = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)

            absolute_error = torch.abs(outputs - labels)
            total_mae += absolute_error.sum(dim=0)

            non_zero_mask = torch.abs(labels) > 1e-8
            percentage_error = (absolute_error / torch.abs(labels)) * 100
            total_mape += (percentage_error * non_zero_mask).sum(dim=0)

            smape = 200.0 * torch.abs(outputs - labels) / (torch.abs(outputs) + torch.abs(labels) + torch.finfo(torch.float32).eps)
            total_smape += smape.sum(dim=0)
            
            mse = (outputs - labels) ** 2
            total_mse += mse.sum(dim=0)

            total_count += labels.size(0)

    mae = total_mae / total_count
    mape = total_mape / total_count
    smape = total_smape / total_count
    rmse = torch.sqrt(total_mse / total_count)

    return mae.cpu().numpy(), mape.cpu().numpy(), smape.cpu().numpy(), rmse.cpu().numpy()


In [67]:
import glob

# Path to your model directory
model_dir = './'

# Sort models based on performance
model_performance = []

# For each model file in the directory
for model_file in glob.glob(model_dir + '/*.pth'):
    # Load model state
    model = CNN()
    model.load_state_dict(torch.load(model_file))
    model.to(device)

    # Evaluate model
    mae, mape, smape, rmse = evaluate_model(model, test_dataset)  # Pass test_dataset instead of "test"

    # Calculate some form of aggregate score. Here, I use the mean of all metrics.
    # Change this if you want a different form of ranking.
    aggregate_score = np.mean([np.mean(mae), np.mean(mape), np.mean(smape), np.mean(rmse)])

    # Save model's performance
    model_performance.append((model_file, mae, mape, smape, rmse, aggregate_score))

# Sort models based on the aggregate score
model_performance.sort(key=lambda x: x[5])

# Print model performance in order
for model_info in model_performance:
    model_file, mae, mape, smape, rmse, aggregate_score = model_info
    print(f"Model: {model_file}, Aggregate Score: {aggregate_score:.4f}")
    for i in range(2):
        print(f"Metrics for prediction {i+1}:")
        print(f"Mean Absolute Error: {mae[i]:.4f}")
        print(f"Mean Absolute Percentage Error: {mape[i]:.2f}%")
        print(f"Symmetric Mean Absolute Percentage Error: {smape[i]:.2f}%")
        print(f"Root Mean Square Error: {rmse[i]:.4f}")
    print()

# Open the text document in write mode
with open('model_performance_model_test.txt', 'w') as f:
    # Print model performance in order
    for model_info in model_performance:
        model_file, mae, mape, smape, rmse, aggregate_score = model_info
        f.write(f"Model: {model_file}, Aggregate Score: {aggregate_score:.4f}\n")
        for i in range(2):
            f.write(f"Metrics for prediction {i+1}:\n")
            f.write(f"Mean Absolute Error: {mae[i]:.4f}\n")
            f.write(f"Mean Absolute Percentage Error: {mape[i]:.2f}%\n")
            f.write(f"Symmetric Mean Absolute Percentage Error: {smape[i]:.2f}%\n")
            f.write(f"Root Mean Square Error: {rmse[i]:.4f}\n")
        f.write("\n")



Model: .\current_best_model_b4_lr0.001_e500.pth, Aggregate Score: 24.2052
Metrics for prediction 1:
Mean Absolute Error: 0.7534
Mean Absolute Percentage Error: 17.65%
Symmetric Mean Absolute Percentage Error: 19.17%
Root Mean Square Error: 0.9387
Metrics for prediction 2:
Mean Absolute Error: 45.6443
Mean Absolute Percentage Error: 18.75%
Symmetric Mean Absolute Percentage Error: 23.52%
Root Mean Square Error: 67.2114

Model: .\cnn_oldmodel_500images_b4_e500.pth, Aggregate Score: 25.2631
Metrics for prediction 1:
Mean Absolute Error: 0.9332
Mean Absolute Percentage Error: 21.87%
Symmetric Mean Absolute Percentage Error: 22.49%
Root Mean Square Error: 1.1386
Metrics for prediction 2:
Mean Absolute Error: 48.1815
Mean Absolute Percentage Error: 16.88%
Symmetric Mean Absolute Percentage Error: 19.09%
Root Mean Square Error: 71.5225

Model: .\cnn_oldmodel_500images_b8_e500.pth, Aggregate Score: 40.4328
Metrics for prediction 1:
Mean Absolute Error: 2.7092
Mean Absolute Percentage Error: 64