In [2]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-3133706a-6150-1fb5-838d-c2ef6dd832f3)


In [18]:
# Importing libraries needed for this project

import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns

from tensorflow import keras

from matplotlib import image as mpimg
from matplotlib import pyplot as plt

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support

import os
import random
import itertools
import datetime
import zipfile

In [17]:
# Creating functions that will be used in the project

def load_and_prep_image(file_path,
                        img_shape=224,
                        scale=True):
  
  """
  Loads an image from a given file path and turns it into a tensor 
  of a predefined shape (img_shape,img_shape,3).

  Args:
  
    file_path (str): string path to an image
    img_shape (int): expected size of returned tensor (default = 224)
    scale (bool): information whether to scale pixel values (default = True)
  
  Returns:

    Tensor representation of a given image.
  """

  img = tf.io.read_file(file_path)
  img = tf.image.decode_jpeg(img)
  img = tf.image.resize(img, [img_shape, img_shape])

  if scale:
    return img/255.
  else:
    return img


def make_conf_matrix(y_true,
                     y_pred,
                     classes=None,
                     figsize=(10, 10),
                     text_size=15,
                     norm=False,
                     savefig=False): 
                        

  """
  
  Prepares a confusion matrix that compares predictions to ground truth labels.

  Args:

    y_true: array with ground truth labels
    y_pred: array with predictions
    classes: array of class names 
    figsize: expected size of figure (default = (10, 10)).
    text_size: expected size of text on a figure (default=15).
    norm: information whthere to normalize values or not (default=False).
    savefig: information whether to save confusion matrix to a file (default=False).
  
  Returns:

    Confusion matrix plot that compares predictions to ground truth labels.

  """  
  
  cm = confusion_matrix(y_true, y_pred)
  cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
  n_classes = cm.shape[0]

  fig, ax = plt.subplots(figsize=figsize)
  cax = ax.matshow(cm, cmap=plt.cm.Blues)
  fig.colorbar(cax)

  if classes:
    labels = classes
  else:
    labels = np.arange(cm.shape[0])
  
  ax.set(title="Confusion Matrix",
         xlabel="Predicted label",
         ylabel="True label",
         xticks=np.arange(n_classes),
         yticks=np.arange(n_classes), 
         xticklabels=labels,
         yticklabels=labels)
  
  ax.xaxis.set_label_position("bottom")
  ax.xaxis.tick_bottom()

  threshold = (cm.max() + cm.min()) / 2.

  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    if norm:
      plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)
    else:
      plt.text(j, i, f"{cm[i, j]}",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)

  if savefig:
    fig.savefig("confusion_matrix.png")


def pred_and_plot(model,
                  file_path, 
                  class_names):
  """
  Loads an image from a give path and makes a prediction on it with
  provided model, plotting the image with its prediction at the end.

  Args:
    
    model: pretrained model to perform predictions with
    file_path: path to an image that we whant to make prediction on
    class_names: names of available classes that we're predicting
  
  Returns:

    Plots provided image along with it's predicted lable in the title
  """


  img = load_and_prep_image(file_path)
  pred = model.predict(tf.expand_dims(img, axis=0))

  if len(pred[0]) > 1: 
    pred_class = class_names[pred.argmax()]
  else:
    pred_class = class_names[int(tf.round(pred)[0][0])]

  plt.imshow(img)
  plt.title(f"Prediction: {pred_class}")


def create_tensorboard_callback(dir_name,
                                experiment_name):
  
  """
  Creates a TensorBoard callback to store training log files.

  Args:
  
    dir_name: target location to keep TensorBoard log files
    experiment_name: name of experiment to distinguish log files between eachother

  Returns:
  
    TensorBoard callback obejct to be used a one of parameters during
    model training process.
  """

  log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
  tb_callback = tf.keras.callbacks.TensorBoard(
      log_dir=log_dir
  )
  
  return tb_callback



def plot_training_curves(history):

  """
  Plots separate training curves for training and validation set.

  Args:
  
    history: TensorFlow model History object
  
  Returns:
  
    Plot of separate training curves for training and validation set.
  """ 
  
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  accuracy = history.history['accuracy']
  val_accuracy = history.history['val_accuracy']

  epochs = range(len(history.history['loss']))

  plt.figure(figsize = (24,7))

  plt.subplot(1,2,1)
  plt.plot(epochs, loss, label='training_loss')
  plt.plot(epochs, val_loss, label='val_loss')
  plt.title('Loss')
  plt.xlabel('Epochs')
  plt.legend()

  plt.subplot(1,2,2)
  plt.plot(epochs, accuracy, label='training_accuracy')
  plt.plot(epochs, val_accuracy, label='val_accuracy')
  plt.title('Accuracy')
  plt.xlabel('Epochs')
  plt.legend();




def compare_histories(org_history,
                     new_history, 
                     init_epochs=5):
  
    """
    Compares two TensorFlow model History objects.
    
    Args:

      org_history: History object from original model
      new_history: History object from fine tuned model
      init_epochs: Number of epochs in initial training stage


    Returns:

    Plots showing training curves of training and valuation set,
    before and after tunning phase

    """
    
    acc = org_history.history["accuracy"]
    loss = org_history.history["loss"]

    val_acc = org_history.history["val_accuracy"]
    val_loss = org_history.history["val_loss"]

    total_acc = acc + new_history.history["accuracy"]
    total_loss = loss + new_history.history["loss"]

    total_val_acc = val_acc + new_history.history["val_accuracy"]
    total_val_loss = val_loss + new_history.history["val_loss"]

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([init_epochs-1, 
              init_epochs-1],
              plt.ylim(),
             label='Start Fine Tuning')
    
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([init_epochs-1,
              init_epochs-1],
              plt.ylim(),
             label='Start Fine Tuning')
    
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()


def unzip_data(file_path):
  
  """
  Extracts file from a file_path into current working directory.

  Args:
    
    file_path (str): a file path to a file that needs to be extracted.
  
  Returns:

    Extracts file from a file_path into current working directory.

  """
  
  zip_ref = zipfile.ZipFile(file_path, "r")
  zip_ref.extractall()
  zip_ref.close()



def walk_through_dir(dir_path):

  """
  Walks through dir_path returning its contents.

  Args:
  
    dir_path (str): target directory
  
  Returns:

    Prints out the number and names of subdirectories, along with
    number of files in each subdirectory
  """
  
  for dir_path, dir_names, file_names in os.walk(dir_path):
    print(f"There are {len(dir_names)} directories and {len(file_names)} images in '{dir_path}'.")



from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def calculate_results(y_true, y_pred):
  """
  Calculates model accuracy, precision, recall and f1 score 
  of a binary classification model.

  Args:
      y_true: true labels in the form of a 1D array
      y_pred: predicted labels in the form of a 1D array

  Returns a dictionary of accuracy, precision, recall, f1-score.
  """
  # Calculate model accuracy
  model_accuracy = accuracy_score(y_true, y_pred) * 100
  # Calculate model precision, recall and f1 score using "weighted average
  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
  model_results = {"accuracy": model_accuracy,
                  "precision": model_precision,
                  "recall": model_recall,
                  "f1": model_f1}
                  
  return model_results
