In [1]:
# Imports
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec

import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow import keras
from keras import layers
from keras.layers import Dense, Flatten, MaxPooling2D, PreprocessingLayer, Dropout


# Project 2 - Veggie Classification

For this assignment you'll need to classify some images of vegetables. 

## Deliverables

Please submit your files into Moodle as follows:
<ul>
<li> A zipped .h5 model that has been trained. See the notes towards the end of the file. 
<li> Your .ipynb file. 
<li> A note (~1 to 2 paragraphs) in the comments of Moodle noting what you did to improve accuracy beyond just making a model. 
</ul>

### Grades

<ul>
<li> Accuracy - 60%
<li> Code - readable and logical - 20%
<li> Explainatory note - 20%
</ul>

## Load Training and Validation Data

Please download the zip file from moodle and place it into your folder. If running on Colab you'll need to load it into the workspace. 

#### Colab and Files

You can load files into your Colab workspace via a drag and drop, however this file storage is temporary and will go away when you end your runtime. You can also use your Google drive to store it without having to load it multiple times - there are lots of guides online to setting this up. 

#### Temporary Files

If there are temporary files generated anywhere in the image folders you will need to remove them or you will probably get weird errors. For example, on a Mac (which I'm using to make this), there are temporary files that start with "._". If these appear for you, you can clear them via:
<ul>
<li> Mac: open a terminal at the top level of the image folder and run "dot_clean -n ."
<li> Windows: open a command prompt at the top level of the image folder and run "find . -name "._*" -exec rm '{}' \; -print"
</ul>

If this doesn't work, or if there are any other temporary files created in your file system you can remove them any other way - e.g. use search to find the files and remove them, sort by filetype and delete, etc... The commands above are just shortcuts. 

#### File Naming

Once things are unzipped ensure that the paths are correct and match your file paths. 

#### Loading from Google Drive

You can also use an adaptation of the code below to load the file from your Google Drive if you're in Colab. You'll get some permission prompts if you haven't done this before. 

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

#!cp "/content/drive/My Drive/Vegetables.zip" "Vegetables.zip"

In [2]:
# UNZIP - Ensure that the filename is correct
import zipfile

zip_name = "Vegetables.zip"

with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall()

In [6]:
# Generate Datasets - you can change this if desired
# ENSURE FILE PATHS MATCH CORRECTLY
IMAGE_SIZE=(224,224)
train_dir='Vegetables/train'
val_dir='Vegetables/validation'

# Load training data
train_ds = image_dataset_from_directory(
    train_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
    shuffle=True,
)

val_ds = image_dataset_from_directory(
    val_dir,
    label_mode='categorical',
    image_size = IMAGE_SIZE,
)

Found 20000 files belonging to 10 classes.
Found 4000 files belonging to 10 classes.


In [None]:
# Declare Size Variables - Needed for Compressing
batch_size = 64
train_samples = 10000
epochs = 5

#### Training and Validation Data

After the file has been unzipped and loaded into datasets, you should see:

Found 20000 files belonging to 10 classes.
Found 4000 files belonging to 10 classes.

The first is the training dataset, the second is the validation dataset. 

In [None]:
# Do Modelling Stuff
#######################

# NOTE - HOLD ON FINAL SUBMISSION STUFF
## I NEED TO ENSURE THAT THERE'S NO CHANCE OF ANYONE EXCEEDING SIZE LIMITS OF MOODLE

### Compress and Save Model

For this you have a hard upper limit for model size of 400mb. Why? This is the largest that Moodle can accept, and I do not have any way to work around that. 

The model will probably need to be pruned to be smaller, then compressed as a zip. In testing this the size can very by a lot, pruning and compressing can get it down to less than 100mb. The size of yours does not matter, as long as it is small enough. 

In [None]:
# Compression Setup

#model = 


In [None]:
!pip install tensorflow_model_optimization
import tensorflow_model_optimization as tfmot

end_step = np.ceil(1.0 * train_samples / batch_size).astype(np.int32) * epochs
print(end_step)

new_pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                   final_sparsity=0.90,
                                                   begin_step=0,
                                                   end_step=end_step,
                                                   frequency=100)
}

new_pruned_model = tfmot.sparsity.keras.prune_low_magnitude(model, **new_pruning_params)
new_pruned_model.summary()

new_pruned_model.compile(
    loss=tf.keras.losses.categorical_crossentropy,
    optimizer='adam',
    metrics=['accuracy'])

In [None]:
logdir = "logs"
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    tfmot.sparsity.keras.PruningSummaries(log_dir=logdir, profile_batch=0)
]

new_pruned_model.fit(train_ds,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=callbacks,
          validation_data=val_ds)

In [None]:
final_model = tfmot.sparsity.keras.strip_pruning(new_pruned_model)
final_model.save("final.h5")
final_model.summary()