In [None]:
# Install necessary libraries (only installs if not already installed)
# Run this cell if you have not already installed these libraries, it will download it to your local machine.
# This will usually fix the ModuleNotFoundError in the cell below
# If you already have them, You will see "Requirements already satisfied" and it will not do anything
import sys
!{sys.executable} -m pip install torch torchvision numpy matplotlib kagglehub

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
import kagglehub
import os
import math
from torch.utils.data import DataLoader, Dataset

In [None]:
# Download dataset from kagglehub
path = kagglehub.dataset_download("kritikseth/fruit-and-vegetable-image-recognition")

print("Path to dataset files:", path)

In [None]:
# Define transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)), #Resize images because ResNet18 needs 224x224 images
    transforms.ToTensor(), #Convert to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #Normalize for imagenet standards
])

In [None]:
# Load dataset
train_dataset = torchvision.datasets.ImageFolder(root=os.path.join(path, "train"), transform=data_transforms) #Load train folder from dataset + transform
test_dataset = torchvision.datasets.ImageFolder(root=os.path.join(path, "test"), transform=data_transforms) #Load test folder from dataset + transform

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) #Shuffling to ensure randomness and prevent bias/overfitting
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) #Don't shuffle because it is a validation/test dataset to ensure reproducibility

In [None]:
# Parameters
num_columns = 4  # Adjust this for the number of columns

# Display images in a grid
classes = train_dataset.classes
class_to_idx = train_dataset.class_to_idx
class_images = {cls: [] for cls in classes}

# Collect one sample image per class
for img_path, label in train_dataset.samples:
    class_name = classes[label]
    if len(class_images[class_name]) < 1:  # Increase number (1) if you want to see more images from the class
        class_images[class_name].append(img_path)

# Calculate grid size
num_classes = len(classes)
num_rows = math.ceil(num_classes / num_columns)

# Create the grid
fig, axs = plt.subplots(num_rows, num_columns, figsize=(num_columns * 4, num_rows * 4))
axs = axs.flatten()  # Flatten for easier indexing

for i, cls in enumerate(classes):
    if class_images[cls]:  # Check if the class has images
        img_path = class_images[cls][0]  # Use the first image for simplicity
        img = torchvision.io.read_image(img_path).permute(1, 2, 0).numpy() / 255.0  # Normalize
        axs[i].imshow(img)
        axs[i].axis('off')
        axs[i].set_title(cls)

# Turn off remaining axes
for j in range(len(classes), len(axs)):
    axs[j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

# Modify the final fully connected layer to match the number of classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.classes))

In [None]:
# Test cell to see if model can be trained with the help of GPU 
# Prints True/False depending on CUDA availability aswell as CUDA version and device name

print(torch.cuda.is_available())  # Should return True or False
print(torch.version.cuda)         # Should return CUDA version or None
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))  # Should display GPU name
else:
    print("PyTorch cannot locate a CUDA compatible device (NVIDIA GPU).") # Error handling and potential solutions
    print("The possibilities are:")
    print("- No CUDA compatible NVIDIA GPU.")
    print("- CUDA drivers are not properly installed.")
    print("- You are using a non-CUDA version of PyTorch.")
    print("Model will be trained using only the CPU.")

In [None]:
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Training the model
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")

In [None]:
# Save the trained model
torch.save(model.state_dict(), "PhytovisionModel.pth")

In [None]:
# Evaluate the model and testing accuracy
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

# Additional Testing 
## The cells below will perform indepth analysis to gain insight into the model's performance 
### (Some of these functions might not work correctly or produce errors at the moment, it is experimental and will be in working condition very soon)

In [None]:
!{sys.executable} -m pip install scikit-learn ipywidgets pillow

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

all_labels = []
all_preds = []

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Create confusion matrix
cm = confusion_matrix(all_labels, all_preds, labels=range(len(train_dataset.classes)))

# Create a larger figure and adjust layout
plt.figure(figsize=(12, 12))  # Adjust the size by increasing or decreasing
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=train_dataset.classes)
disp.plot(cmap="viridis", xticks_rotation=45, ax=plt.gca())
plt.tight_layout()  # Adjust layout to fit everything properly
plt.show()

#https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ConfusionMatrixDisplay.html (base code sourced from here)

In [None]:
class_correct = [0] * len(train_dataset.classes)
class_total = [0] * len(train_dataset.classes)

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(len(labels)):
            class_correct[labels[i]] += c[i].item()
            class_total[labels[i]] += 1

for i in range(len(train_dataset.classes)):
    print(f"Accuracy of {train_dataset.classes[i]}: {100 * class_correct[i] / class_total[i]:.2f}%")

In [None]:
import random

misclassified = []

model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        for i in range(len(labels)):
            if predicted[i] != labels[i]:
                misclassified.append((images[i].cpu(), labels[i].cpu(), predicted[i].cpu()))

# Display a few misclassified images
num_to_show = 5
random.shuffle(misclassified)
fig, axs = plt.subplots(1, num_to_show, figsize=(15, 5))

for i in range(num_to_show):
    img, true_label, pred_label = misclassified[i]
    axs[i].imshow(img.permute(1, 2, 0).numpy())
    axs[i].axis('off')
    axs[i].set_title(f"True: {train_dataset.classes[true_label]}\nPred: {train_dataset.classes[pred_label]}")

plt.show()

In [None]:
from IPython.display import display
import ipywidgets as widgets
from PIL import Image
import io

# Load the trained model
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)  # Adjust based on your dataset
model.load_state_dict(torch.load("PhytovisionModel.pth", map_location=torch.device('cpu')))  
model.eval()

# Define preprocessing for the uploaded image
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# UI Elements
upload_button = widgets.FileUpload(accept='.jpg,.jpeg,.png', multiple=False)
predict_button = widgets.Button(description="Predict")
output = widgets.Output()

# Store uploaded image
uploaded_image = None

def on_upload_change(change):
    """Handles file upload and stores the image."""
    global uploaded_image

    for file_name, file_info in upload_button.value.items():
        uploaded_image = Image.open(io.BytesIO(file_info['content']))  # Store image
        
        # Display the uploaded image
        with output:
            output.clear_output()
            display(uploaded_image)
            print("Image uploaded! Click 'Predict' to analyze.")

def on_predict_click(b):
    """Handles prediction when 'Predict' button is clicked."""
    global uploaded_image

    if uploaded_image is None:
        with output:
            print("Please upload an image first!")
        return

    # Preprocess the image
    input_tensor = preprocess(uploaded_image).unsqueeze(0)

    # Perform inference
    with torch.no_grad():
        outputs = model(input_tensor)
        _, predicted = torch.max(outputs, 1)
        confidence = torch.softmax(outputs, dim=1)[0][predicted].item() * 100

    # Display prediction
    with output:
        print(f"Prediction: Class {predicted.item()}")  # Update with class names if available
        print(f"Confidence: {confidence:.2f}%")

# Link events to functions
upload_button.observe(on_upload_change, names='value')
predict_button.on_click(on_predict_click)

# Display the UI
display(widgets.VBox([
    widgets.Label("Upload an image of a fruit or vegetable:"),
    upload_button,
    predict_button,  # Added the predict button
    output
]))

Credits / Acknowledgements / Resources used in the creation of this project

- Dataset: https://www.kaggle.com/datasets/kritikseth/fruit-and-vegetable-image-recognition/
- PyTorch: [https://pytorch.org/](https://pytorch.org/) + Torchvision: [https://pytorch.org/vision/](https://pytorch.org/vision/)
- PyTorch Tutorials
- Various machine learning tutorials on YouTube
- Machine learning subreddit: [https://www.reddit.com/r/learnmachinelearning/](https://www.reddit.com/r/learnmachinelearning/)


I am grateful to my supervisor for his guidance and academic advice during the project development.
# Not yet complete, the remaining sources will be added very soon