# Chicken Disease Image Classification

The following code chunks contain the code required to either retrain the final model from scratch or in section 2 download and prepare the trained model

## 1. Creating a new model

In [None]:
## Setting seed for reproducability 

from numpy.random import seed
seed(1)
from tensorflow.random import set_seed
set_seed(2)

## Setting up required packages
!pip install split-folders

import os
import os.path
import shutil
import splitfolders
import warnings
import matplotlib.pyplot as plt
import matplotlib.text as txt
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
import pandas as pd
from pandas import DataFrame
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from tensorflow.keras.layers.experimental import preprocessing
from google.colab import files
from keras.layers.rnn import time_distributed
import time
import seaborn as sn
from sklearn.metrics import confusion_matrix
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import EfficientNetB3

In [None]:
def mergefolders(root_src_dir, root_dst_dir): # create function to merge folders
    """
    Merges two folders into one
    root_src_dir: str - origin folder
    root_dst_dir: str - destination folder
    
    """
    for src_dir, dirs, files in os.walk(root_src_dir):
          dst_dir = src_dir.replace(root_src_dir, root_dst_dir, 1)
          if not os.path.exists(dst_dir):
              os.makedirs(dst_dir)
          for file_ in files:
              src_file = os.path.join(src_dir, file_)
              dst_file = os.path.join(dst_dir, file_)
              if os.path.exists(dst_file):
                  os.remove(dst_file)
              shutil.copy(src_file, dst_dir)


In [None]:
def get_data(test = 0.1, train = 0.8, validate = 0.1):
  
  """
  Removes any previously downloaded data then downloads 'chicken-disease-1' data from kaggle.
  Splits data into seperate folders based on disease, then splits data into 
  test, train and validate using a default of 80/10/10

    test: float - proportion of data to be partitioned into test data
    train: float - proportion of data to be partitioined into training data
    validate: float - proportion of data to be partitioned into validation data
    
    """

 # remove any previously downloaded data
  !rm -rf '/content/Train'
  !rm -rf '/content/train_data.csv'
  !rm -rf '/content/output'
  !rm -rf '/content/chicken-disease-1 .zip'

  !kaggle datasets download -d 'allandclive/chicken-disease-1 ' -p /content #download data
  !unzip -qq 'chicken-disease-1 .zip' # unzip data
  

  folder_path = '/content/Train' # Set path where images are

# create list of images to go through below
  images = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

  for image in images:
    folder_name = image.split('.')[0] # Creates folder based on first part of name

    new_path = os.path.join(folder_path, folder_name) # creates new path name
    if not os.path.exists(new_path): # checks if path exixts
      os.makedirs(new_path)  # if doesn't exists creates it

    old_image_path = os.path.join(folder_path, image)
    new_image_path = os.path.join(new_path, image)
    shutil.move(old_image_path, new_image_path) # moves picture


  # merge all folders with similar contents

  mergefolders('/content/Train/pcrcocci', '/content/Train/cocci')
  mergefolders('/content/Train/pcrsalmo', '/content/Train/salmo')
  mergefolders('/content/Train/pcrncd', '/content/Train/ncd')
  mergefolders('/content/Train/pcrhealthy', '/content/Train/healthy')

  # remove folders not needed

  !rm -rf '/content/Train/pcrsalmo'
  !rm -rf '/content/Train/pcrncd'
  !rm -rf '/content/Train/pcrhealthy'                    
  !rm -rf '/content/Train/pcrcocci'

  # Check folder counts
  print(f"cocci {len(os.listdir('/content/Train/cocci'))}")
  print(f"healthy {len(os.listdir('/content/Train/healthy'))}")
  print(f"ncd {len(os.listdir('/content/Train/ncd'))}")
  print(f"salmonella {len(os.listdir('/content/Train/salmo'))}")

  # split data into test, train and validate partitions 

  splitfolders.ratio('Train', output="output", seed=1337, ratio=(train,test,validate))


In [None]:
def data_size(pixel_size = 224, batch_size = 128):
  """
    Create variables with the ready split data
    Allows the pizel size and batch size to be set
    Autotunes the data to allow increased model speed
    Returns three variables in the order train, test and validate

    pixel_size: int - Single number (x) which is used to set the pixel size of the image to (x,x)
    batch_size: int - Size of batch of images for training the model
    """
# creates variable with pixel size and batch size altered 
  ds_train_ = image_dataset_from_directory(
      '/content/output/train', # sets source of images
      labels='inferred', # infers labels from folder names
      label_mode='categorical',
      image_size=[pixel_size, pixel_size], # allows pixel size to be altered by function call
      interpolation='nearest',
      batch_size=batch_size, # allows batch size to be altered by function call
      shuffle=True,
  )

  ds_valid_ = image_dataset_from_directory(
      '/content/output/val', # sets source of images
      labels='inferred',
      label_mode='categorical',
      image_size=[pixel_size, pixel_size], # allows pixel size to be altered by function call
      interpolation='nearest',
      batch_size=batch_size, # allows batch size to be altered by function call
      shuffle=False,
  )

  ds_test_ = image_dataset_from_directory(
      '/content/output/test', # sets source of images
      labels='inferred',
      label_mode='categorical',
      image_size=[pixel_size, pixel_size],# allows pixel size to be altered by function call
      interpolation='nearest',
      batch_size= batch_size, # allows batch size to be altered by function call
      shuffle=False,
  )
  
  # Autotunes the data to allow for faster model processing
  AUTOTUNE = tf.data.AUTOTUNE

  ds_train_ = ds_train_.cache().prefetch(buffer_size=AUTOTUNE)
  ds_test_ = ds_test_.cache().prefetch(buffer_size=AUTOTUNE)
  ds_valid_ = ds_valid_.cache().prefetch(buffer_size=AUTOTUNE)
 
  return [ds_train_, ds_test_, ds_valid_]

In [None]:
## Need to add kaggle.json file to allow data download from kaggle
from google.colab import files
files.upload()

!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# create a data augmentation layer to include in models
data_augmentation = keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomFlip("vertical"),
        layers.RandomZoom(0.2),
        layers.RandomRotation(factor = 0.2),
        layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
    ])

In [None]:
checkpoint_filepath = '/content/Models' # set location to save model
callback = [ # create a list of callbacks to include in model
    callbacks.EarlyStopping( # include early stopping to reduce overfitting
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
    monitor = 'val_accuracy',
    verbose = 1
),
# include checkpoint to save best model as training progresses
callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'),
# include reduce learning rate on Plateau
callbacks.ReduceLROnPlateau(monitor = 'val_accuracy', verbose = 1, factor = 0.5),
]

In [None]:
inception = InceptionV3(include_top = False, input_shape = (224,224,3) )

In [None]:
# crate a function to create the final model
def final_model(data = True, graph = True, epoch = 200, verbose = 1, fine_tune = 0 ):
  """
  Function to train a compiled model incorporating a pretrained_base and a 
  head. It is compiled using the adam optimiser.

  data: bool - If the history information should be printed, default is true
  graph: bool - If the val_accuracy and val_loss graphs should be printed,
  default is True
  epoch: int - number of epochs for the model to run for
  verbose: int - 1 for show progress, 0 for don't show progress
  fine_tune: int - layers of the convolutional base to be trainable

  """
  tf.get_logger().setLevel('ERROR') # reduce error messages shown

  # Pretrained convolutional layers are loaded using the Imagenet weights.
    # Include_top is set to False, in order to exclude the model's fully-connected layers.
  conv_base = InceptionV3(include_top = False, input_shape = (224,224,3) )

    # Defines how many layers to freeze during training.
    # Layers in the convolutional base are switched from trainable to non-trainable
    # depending on the size of the fine-tuning parameter.

  if fine_tune > 0:
        for layer in conv_base.layers[:-fine_tune]:
            layer.trainable = False
  else:
        for layer in conv_base.layers:
            layer.trainable = False

  ## define the model including, augemntation, and trainable base
  model = keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    data_augmentation,
    inception,
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(4, activation='softmax'),
  ])
  # compile the model
  model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
  )
  #train the model
  history = model.fit(
    ds_train_,
    validation_data=ds_valid_,
    callbacks = [callback],
    epochs=epoch,
    verbose = verbose)
  
  # show results if required

  history_frame = pd.DataFrame(history.history)
  if data == True:
    print(history_frame)
  if graph == True:
    history_frame.loc[1:, ['loss', 'val_loss']].plot()
    history_frame.loc[1:, ['accuracy', 'val_accuracy']].plot();

  return history



In [None]:
get_data()
ds_train_, ds_test_, ds_valid_ = data_size()

In [None]:
final_model()

##2. Downloading the trained model

In [None]:
# download model
!wget -O model.zip https://www.dropbox.com/s/t49w8rr9jm5a36d/Models.zip?dl=0

# unzip model
!unzip /content/model.zip

# Load model
model = tf.keras.models.load_model('/content/Models')
