In [59]:
import os
import re
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column, gridplot
from bokeh.transform import dodge
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import methods
from collections import defaultdict
import importlib as il
# from torchsampler import ImbalancedDatasetSampler

In [None]:
il.reload(methods)

In [None]:
root_dir = './_results'  

# Create a dataset and dataloader
transform = transforms.Compose([
    transforms.Resize((100, 25)),
    transforms.ToTensor(),
])

score_classification = [0, 150, 300, 470, 630] 

dataset = methods.CustomImageDataset(root_dir=root_dir, transform=transform, classification=score_classification)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

dataset.print_labels(num_samples=5)
dc=methods.Container()
methods.plot_histogram_and_scatter(dataset, dc=dc)

In [68]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Split dataset into train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
train_loader = DataLoader(train_dataset, sampler=methods.CustomImbalancedDatasetSampler(train_dataset), batch_size=32)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


cuda


In [None]:
# Initialize the model, num_epochs, loss function, and optimizer
model = methods.ChocolateCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50

#training loop
avg_val_loss, val_losses, val_accuracy, train_losses, cm, misclassified_images= methods.training_loop(model, num_epochs, criterion, optimizer, train_loader, val_loader, device)

print("Training complete!")

In [75]:

loss_function = 'CrossEntropyLoss'
additional_text = 'Anything'
model_class_name = 'ChocolateCNN'
score_classification = score_classification 

# Get the validation indices
val_indices = val_dataset.indices if isinstance(val_dataset, torch.utils.data.Subset) else list(range(len(val_dataset)))

save_path = 'models/model_with_metadata_sampled2.pth'

methods.save_model_with_metadata(
    model, optimizer, num_epochs, loss_function, avg_val_loss, val_accuracy,
    model_class_name, score_classification, val_losses, train_losses, 
    additional_text, val_indices, misclassified_images, cm, save_path
)

In [None]:
il.reload(methods)
# saved model paths
model_paths = [
    # 'models/chocolate_cnn1.pth',
    # 'models/chocolate_cnn_best.pth',
    # 'models/chocolate_cnn.pth',
    'models/model_with_metadata_sampled3.pth',
    'models/model_with_metadata_sampled4.pth'
]

# Dictionary to store misclassified filenames for each model
misclassified_details = defaultdict(dict)

for model_path in model_paths:
    print(f"Evaluating model: {model_path}")

    (model, optimizer, num_epochs, criterion, additional_text, valid_loss, 
    valid_accuracy, score_classification, misclassified_images, cm, val_indices,train_losses, val_losses) = methods.load_model_with_metadata(model_path,device)
    print()
    # Reconstruct the validation DataLoader
    loaded_dataset = methods.CustomImageDataset(root_dir='./_results', transform=transform, classification=score_classification)
    val_dataset = torch.utils.data.Subset(loaded_dataset, val_indices)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Store misclassified details
    for info in misclassified_images:
        if info['filename'] not in misclassified_details[model_path]:
            misclassified_details[model_path][info['filename']] = {'image': info['image'], 'details': []}
        misclassified_details[model_path][info['filename']]['details'].append({
            'true': info['label'],
            'pred': info['predicted'],
            'score': info['score']
        })

    # Plot training and validation loss
    methods.plot_losses(num_epochs, train_losses, val_losses, title=f'Training and Validation Loss for {model_path}')
    
    # Plot confusion matrix
    methods.plot_confusion_matrix(cm, class_names=[0, 1, 2, 3, 4])  # Adjust class names as needed

    # Show misclassified images
    methods.show_misclassified_images(misclassified_images)
    

In [None]:
# Find common misclassified images across all models
common_misclassified_images = set(misclassified_details[model_paths[0]].keys())
for model_path in model_paths[1:]:
    common_misclassified_images.intersection_update(misclassified_details[model_path].keys())

print(f"Common misclassified images: {len(common_misclassified_images)}")


# Show common misclassified images for the last model loaded (or any of the models)
methods.show_common_misclassified_images(common_misclassified_images, misclassified_details, model_paths)