In [1]:
#!pip install wget
import os, warnings
import tensorflow as tf
from tensorflow.keras.layers import RandomFlip, RandomRotation
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Project 2 - Veggie Classification

For this assignment you'll need to classify some images of vegetables. 

## Parts

Please do two separate classifications:
<ol>
<li> First, create a model from scratch. 
<li> Use transfer learning to use a pretrained model of your choice, adapted to this data. 
</ol>

There won't be an explicit evaluation of accuracy, but you should take some steps to make each model as accurate as you reasonably can, any tuning option is fair game. Along with that, please structure it into a notebook that is well structured and clear that explains what you did and found. Think about:
<ul>
<li> Sections and headings. 
<li> A description of the approach taken (e.g. what did you do to determine size, tune, evaluate, etc...)
<li> Visualization of some important things such as a confusion matrix and maybe some images. 
<li> Results, mainly focused on the scoring of the test data. 
</ul>

The descriptions and explainations should highlight the choices you made and why you made them. Figure up to about a page or so worth of text total, explain what happened but don't write an essay. 

## Deliverables

Please sumbmit a link to your github, where everyhting is fully run with all the outputs showing on the page. As well, in the notebook please add some kind of switch controlled by a variable that will control if the notebook runs to train the model or to load the model in from weights - so I can download it and click run all, it will load the saved weights, and predict.

### Dataset

The code in the start of this notebook will download and unzip the dataset, and there is also a simple example of creating datasets. You can change the dataset bit to use a different approach if you'd like. The data is already split into train, validation, and test sets. Please treat the separate test set as the final test set, and don't use it for any training or validation. Each folder name is its own label.

### Evaluation

Marking will be based on the following:
<ul>
<li> Models are cretaed, tuned, and effective at classifying the data: 40%
<li> Descriptions and explanations of the approach taken: 20%
<li> Code is well structured and clear: 20%
</ul>

Overall the marking is pretty simple and direct, walk through the process of predicting the veggies, explain what you did, and show the results. If you do that, it'll get a good mark.

### Tips

Some hints that may be helpful to keep in mind:
<ul>
<li> The data is pretty large, so you'll want to use datasets rather than load everything into memory. The Keras docs have a few examples of different ways to load image data, our examples showed image generators and the image from directory datasets.  
<li> Be careful of batch size, you may hit the colab limits. 
<li> You'll want to use checkpoints so you can let it train and pick up where you left off.
<li> When developing, using a smaller dataset sample is a good idea. These weights could also be saved and loaded to jump start training on the full data. 
<li>

### Download and Unzip Data

In [2]:
def bar_custom(current, total, width=80):
    print("Downloading: %d%% [%d / %d] bytes" % (current / total * 100, current, total))
import wget
import zipfile
zip_name = "train.zip"

url = "https://jrssbcrsefilesnait.blob.core.windows.net/3950data1/vegetable_image_dataset.zip"

if not os.path.exists(zip_name):
    wget.download(url, zip_name, bar=bar_custom)

with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall()

In [3]:
# Generate Datasets - you can change this if desired
# ENSURE FILE PATHS MATCH CORRECTLY
IMAGE_SIZE=(224,224)
train_dir='Vegetable Images/train'
val_dir='Vegetable Images/validation'
batch_size = 16

# Load training data
train_ds = image_dataset_from_directory(
    train_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
    batch_size=batch_size
)

val_ds = image_dataset_from_directory(
    val_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
    batch_size = batch_size,
)

Found 15000 files belonging to 15 classes.
Found 3000 files belonging to 15 classes.


## Data Preparation

In [4]:
class_names = train_ds.class_names
num_classes = len(class_names)
print("Number of classes:", num_classes)

data_augmentation = tf.keras.Sequential([
    RandomFlip('horizontal'),
    RandomRotation(0.2)
])

train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

Number of classes: 15


We start by extracting the class names from the training dataset, which was loaded using image_dataset_from_directory. We retrieve a list of these class names directly from the dataset. Then calculates the total number of unique classes by finding the length of this list. The count of classes is printed to verify the number correctly.

## Custom Model Training

The model function defines a neural network tailored for image classification. This setup prepares the model for efficient learning and classification to use feature extraction for handling complex image data.

In [5]:
def create_model(num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


The input function defines the process of either training a new model or loading an existing one based on user input.

In [6]:
def train_or_load_model(train_ds, val_ds, num_classes):
    train_or_not = input("Type 'train' or 'load' model:").lower()
    model = create_model(num_classes)

    if train_or_not == "train":
        history = model.fit(train_ds, validation_data=val_ds, epochs=10)
        model.save('path_to_save_model/model.h5')  
        return model, history
    elif train_or_not == "load":
        model.load_weights('path_to_saved_weights/model.h5')
        return model, None
    else:
        print('Typo')
        return None, None

In [7]:
num_classes = len(class_names)
model, history = train_or_load_model(train_ds, val_ds, num_classes)

Epoch 1/10


  super().__init__(


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 148ms/step - accuracy: 0.1251 - loss: 15.7970 - val_accuracy: 0.1473 - val_loss: 2.5315
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 145ms/step - accuracy: 0.1604 - loss: 2.5242 - val_accuracy: 0.1530 - val_loss: 2.5279
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 142ms/step - accuracy: 0.1712 - loss: 2.5034 - val_accuracy: 0.1893 - val_loss: 2.4259
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 139ms/step - accuracy: 0.2064 - loss: 2.4051 - val_accuracy: 0.3190 - val_loss: 2.0537
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 137ms/step - accuracy: 0.2945 - loss: 2.1298 - val_accuracy: 0.4527 - val_loss: 1.6576
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 140ms/step - accuracy: 0.4045 - loss: 1.7621 - val_accuracy: 0.5797 - val_loss: 1.2647
Epoch 7/10
[1



## Transfer Learning Model

We used a transfer learning model using MobileNetV2. The model is applied with GlobalAveragePooling2D to adapt it efficiently to the specific classification task.

In [8]:
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False 

transfer_model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(num_classes, activation='softmax') 
])

transfer_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

transfer_history = transfer_model.fit(train_ds, validation_data=val_ds, epochs=10)

Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 60ms/step - accuracy: 0.5137 - loss: 1.5538 - val_accuracy: 0.7557 - val_loss: 0.7912
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 58ms/step - accuracy: 0.7795 - loss: 0.7146 - val_accuracy: 0.7920 - val_loss: 0.6494
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 80ms/step - accuracy: 0.8216 - loss: 0.5758 - val_accuracy: 0.8127 - val_loss: 0.5884
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 86ms/step - accuracy: 0.8417 - loss: 0.5106 - val_accuracy: 0.8280 - val_loss: 0.5268
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 84ms/step - accuracy: 0.8566 - loss: 0.4623 - val_accuracy: 0.8443 - val_loss: 0.4719
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 84ms/step - accuracy: 0.8696 - loss: 0.4206 - val_accuracy: 0.8743 - val_loss: 0.4197
Epoch 7/10
[1m9

## Test Best Models and Illustrate Results

In [9]:
test_dir='Vegetable Images/test'
test_ds = image_dataset_from_directory(
    test_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
    batch_size = batch_size,
)

Found 3000 files belonging to 15 classes.


The transfer learning and custom models are evaluated on the test dataset to measure their performance. The custom model's predictions on the test dataset are processed to determine the predicted classes, and these predictions are compared against the true labels to evaluate the model's classification accuracy.

In [12]:
transfer_model_results = transfer_model.evaluate(test_ds)
print('Transfer Model Test Loss, Test Accuracy:', transfer_model_results)

custom_model_results = model.evaluate(test_ds)
print('Custom Model Test Loss, Test Accuracy:', custom_model_results)

predictions = model.predict(test_ds)
predicted_classes = np.argmax(predictions, axis=1)
test_labels = np.concatenate([y.numpy() for x, y in test_ds], axis=0)
true_classes = np.argmax(test_labels, axis=1)

[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 45ms/step - accuracy: 0.8625 - loss: 0.3946
Transfer Model Test Loss, Test Accuracy: [0.4273783266544342, 0.8543333411216736]
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - accuracy: 0.8224 - loss: 0.5324
Custom Model Test Loss, Test Accuracy: [0.5885650515556335, 0.8143333196640015]
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 36ms/step
