In [1]:
import os
import re
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.layouts import column
from bokeh.transform import dodge
from collections import Counter

# Function to extract score and assign label
def get_label_from_filename(filename):
    match = re.search(r's(\d+)_', filename)
    if match:
        score = int(match.group(1))
        if 0 <= score < 150:
            return 0
        elif 150 <= score < 300:
            return 1
        elif 300 <= score < 450:
            return 2
        elif 450 <= score < 600:
            return 3
        else:
            return 4
    else:
        raise ValueError(f"Filename does not match expected pattern: {filename}")

# Custom dataset class for our images
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_files = []
        self.labels = []
        for subdir, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith('.jpg'):
                    try:
                        self.image_files.append(os.path.join(subdir, file))
                        self.labels.append(get_label_from_filename(file))
                    except ValueError as e:
                        print(e)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

    def print_labels(self, num_samples=5):
        for i in range(min(num_samples, len(self.image_files))):
            print(f"Filename: {self.image_files[i]}, Label: {self.labels[i]}")


In [9]:

root_dir = './_results'  

# Create a dataset and dataloader
transform = transforms.Compose([
    transforms.Resize((25, 105)),
    transforms.ToTensor(),
])
dataset = CustomImageDataset(root_dir=root_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Display some samples
for images, labels in dataloader:
    print(images.shape, labels)
    break

dataset.print_labels(num_samples=5)


torch.Size([32, 3, 25, 105]) tensor([0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])
Filename: ./_results\gruppe0001\a11_s369_2023-06-27_09-52-42_C5_a11.jpg, Label: 2
Filename: ./_results\gruppe0001\a11_s722_2023-06-27_04-19-14_C0_a11.jpg, Label: 4
Filename: ./_results\gruppe0001\a12_s709_2023-06-27_09-31-12_C5_a12.jpg, Label: 4
Filename: ./_results\gruppe0001\a13_s704_2023-06-27_10-25-12_C50_a13.jpg, Label: 4
Filename: ./_results\gruppe0001\a13_s712_2023-06-27_09-51-53_C5_a13.jpg, Label: 4


In [20]:
print(torch.cuda.is_available())

# Define the CNN model
class ChocolateCNN(nn.Module):
    def __init__(self):
        super(ChocolateCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.fc1 = nn.Linear(64 * 6 * 26, 512)  # Adjusted to match the flattened size
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 5)

    def forward(self, x):
        x = nn.ReLU()(self.bn1(self.conv1(x)))
        x = nn.MaxPool2d(kernel_size=2, stride=2)(x)
        x = nn.ReLU()(self.bn2(self.conv2(x)))
        x = nn.MaxPool2d(kernel_size=2, stride=2)(x)
        x = x.view(-1, 64 * 6 * 26)  # Flatten the tensor
        x = nn.ReLU()(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

True


In [22]:


# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# Split dataset into train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Initialize the model, loss function, and optimizer
model = ChocolateCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_train_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)

    model.eval()
    running_val_loss = 0.0
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    avg_val_loss = running_val_loss / len(val_loader)
    val_accuracy = 100 * correct / total

    print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

print("Training complete!")


cuda
Epoch 1, Train Loss: 0.4163, Validation Loss: 0.1527, Validation Accuracy: 94.35%
Epoch 2, Train Loss: 0.1732, Validation Loss: 0.1285, Validation Accuracy: 95.18%
Epoch 3, Train Loss: 0.1559, Validation Loss: 0.1184, Validation Accuracy: 95.73%
Epoch 4, Train Loss: 0.1441, Validation Loss: 0.1342, Validation Accuracy: 95.73%
Epoch 5, Train Loss: 0.1363, Validation Loss: 0.1300, Validation Accuracy: 95.29%
Epoch 6, Train Loss: 0.1298, Validation Loss: 0.1339, Validation Accuracy: 95.07%
Epoch 7, Train Loss: 0.1234, Validation Loss: 0.1215, Validation Accuracy: 95.73%
Epoch 8, Train Loss: 0.1125, Validation Loss: 0.1069, Validation Accuracy: 96.45%
Epoch 9, Train Loss: 0.1050, Validation Loss: 0.0982, Validation Accuracy: 96.90%
Epoch 10, Train Loss: 0.1014, Validation Loss: 0.0973, Validation Accuracy: 96.51%
Epoch 11, Train Loss: 0.1003, Validation Loss: 0.1187, Validation Accuracy: 96.51%
Epoch 12, Train Loss: 0.0961, Validation Loss: 0.0928, Validation Accuracy: 96.56%
Epoch 13

In [25]:
print(train_size)
print(val_size)
torch.save(model.state_dict(), 'chocolate_cnn.pth')

7216
1804


In [26]:
# Enable Bokeh in the notebook
output_notebook()

# Adding jitter to the points
jitter = 0.05
all_labels_jittered = np.array(all_labels) + np.random.uniform(-jitter, jitter, len(all_labels))
all_predictions_jittered = np.array(all_predictions) + np.random.uniform(-jitter, jitter, len(all_predictions))

# Create a ColumnDataSource for the scatter plot
source_scatter = ColumnDataSource(data={
    'original': all_labels_jittered,
    'predicted': all_predictions_jittered,
    'original_labels': all_labels,
    'predicted_labels': all_predictions
})

# Scatter plot with hover tool
scatter_plot = figure(title="Original Labels vs Predicted Labels (with Jitter)", 
                      x_axis_label='Original Labels', y_axis_label='Predicted Labels', 
                      tools="pan,wheel_zoom,box_zoom,reset")
scatter_plot.scatter('original', 'predicted', source=source_scatter, alpha=0.6, size=10)

hover_scatter = HoverTool()
hover_scatter.tooltips = [
    ("Original", "@original_labels"),
    ("Predicted", "@predicted_labels")
]
scatter_plot.add_tools(hover_scatter)

# Count plot
original_counts = Counter(all_labels)
predicted_counts = Counter(all_predictions)

labels = list(range(5))
original_counts_list = [original_counts.get(label, 0) for label in labels]
predicted_counts_list = [predicted_counts.get(label, 0) for label in labels]

source_count = ColumnDataSource(data={
    'labels': labels,
    'original_counts': original_counts_list,
    'predicted_counts': predicted_counts_list
})

count_plot = figure(title="Count of Original vs Predicted Labels", 
                    x_axis_label='Labels', y_axis_label='Count', 
                    x_range=(-0.5, 4.5), tools="pan,wheel_zoom,box_zoom,reset")

count_plot.vbar(x=dodge('labels', -0.2, range=count_plot.x_range), top='original_counts', width=0.4, source=source_count, color="blue", legend_label="Original", alpha=0.6)
count_plot.vbar(x=dodge('labels',  0.2, range=count_plot.x_range), top='predicted_counts', width=0.4, source=source_count, color="red", legend_label="Predicted", alpha=0.6)

hover_count = HoverTool()
hover_count.tooltips = [
    ("Label", "@labels"),
    ("Original Count", "@original_counts"),
    ("Predicted Count", "@predicted_counts")
]
count_plot.add_tools(hover_count)
count_plot.legend.location = "top_left"

# Show the plots
show(column(scatter_plot, count_plot))


In [30]:
# Save the trained model
torch.save(model.state_dict(), 'chocolate_cnn.pth')

# Function to load the model
def load_model(model_path):
    model = ChocolateCNN().to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    return model

In [31]:
# Function to make predictions 
def predict_images(model, image_folder, transform, device):
    image_files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.jpg')]
    predictions = []

    for img_file in image_files:
        image = Image.open(img_file).convert("RGB")
        image = transform(image).unsqueeze(0).to(device)
        
        with torch.no_grad():
            output = model(image)
            _, predicted = torch.max(output, 1)
            predictions.append((img_file, predicted.item()))
    
    return predictions

# Load the trained model
model = load_model('chocolate_cnn.pth')

# Predict images in a new folder
new_image_folder = './gruppe0019'  
predictions = predict_images(model, new_image_folder, transform, device)

# Print some filenames and their predictions
for img_file, pred in predictions:  # Print first 10 predictions
    print(f"Filename: {img_file}, Predicted Label: {pred}")

Filename: ./gruppe0019\a10_s251_2023-06-27_09-53-06_C5_a10.jpg, Predicted Label: 1
Filename: ./gruppe0019\a10_s50_2023-06-27_00-13-51_C0_a10.jpg, Predicted Label: 0
Filename: ./gruppe0019\a10_s774_2023-06-27_04-56-10_C0_a10.jpg, Predicted Label: 4
Filename: ./gruppe0019\a11_s670_2023-06-27_09-51-53_C5_a11.jpg, Predicted Label: 4
Filename: ./gruppe0019\a11_s695_2023-06-27_10-25-12_C50_a11.jpg, Predicted Label: 4
Filename: ./gruppe0019\a11_s804_2023-06-26_22-22-39_C50_a11.jpg, Predicted Label: 4
Filename: ./gruppe0019\a13_s414_2023-06-27_09-41-28_C4_a13.jpg, Predicted Label: 2
Filename: ./gruppe0019\a13_s727_2023-06-27_12-21-23_C5_a13.jpg, Predicted Label: 4
Filename: ./gruppe0019\a14_s618_2023-06-27_09-51-53_C5_a14.jpg, Predicted Label: 4
Filename: ./gruppe0019\a15_s67_2023-06-27_09-41-28_C4_a15.jpg, Predicted Label: 0
Filename: ./gruppe0019\a18_s684_2023-06-27_10-25-12_C50_a18.jpg, Predicted Label: 4
Filename: ./gruppe0019\a18_s87_2023-06-27_09-51-50_C5_a18.jpg, Predicted Label: 0
File