# Experiment

## Define custom implementations

In [0]:
import os
import shutil

def read_label_from_txt_file(path):
  with open(path) as file:
    return file.readline()

def read_labels_of_images(path):
  images = list(filter(lambda file_name: file_name.endswith(".png"), os.listdir(path)))
  labels = [read_label_from_txt_file(os.path.join(path, file_name[:-4] + ".txt")) for file_name in images]
  return images, labels, set(labels)

def sort_images_after_labels(src, dst):
  """Sorts images in given source path into folders of their corresponding
     label. Folders of labels with the sorted images are places at the
     destination path."""
  images, labels, label_set = read_labels_of_images(src)
  # create folder for each label
  for label in label_set:
    os.makedirs(os.path.join(dst, label), exist_ok=True)
  # copy images into label folders
  for image, label in zip(images, labels):
    shutil.copyfile(os.path.join(src, image), os.path.join(os.path.join(dst, label), image))

In [0]:
class PathConfig:
  """Holds the path configuration for training."""

  def __init__(self,
               model_path,            # directory of safed model
               model_file,            # file name of saved model
               session_path,          # directory of session object collecting the training data
               session_file,          # file name of session object
               train_path,            # directory of training data
               test_path              # directory of testing data
               ):
    self.model_path   = model_path
    self.model_file   = model_file
    self.session_path = session_path
    self.session_file = session_file
    self.train_path   = train_path
    self.test_path    = test_path

In [0]:
class TrainingMode:
  """Enum for specifing the traing mode."""

  # Train with no existing training session. 
  # If a training session already exists, it is ignored and overwritten.
  NEW_TRAINING        = 0   
  
  # Tries to resume training with a existing training session.
  # If no existing session exists, a new one is created.
  TRY_RESUME_TRAINING = 1   

  # Resumes training with a existing training session.
  # If no training session exists, an exception is thrown.
  RESUME_TRAINING     = 2

In [0]:
import pickle

class TrainingSession:
  """Holds and collects the training data that is produced during training."""
  
  def __init__(self):
    self.super_epochs      = 0      # conter of super epochs
    self.total_epochs      = 0      # conter of all epochs
    self.training_history  = []     # list to aggregate the training history of each super epoch
    self.super_epoch_start = []     # list with timestamps when the super epoch started
    self.super_epoch_end   = []     # list with timestamps when the super epoch ended

  def load(path):
    with open(path, "rb") as file:
      return pickle.load(file)

  def save(self, path):
    with open(path , "wb") as file:
      pickle.dump(self, file)

  def append(self, num_epochs, history, start, end):
    self.super_epochs      = self.super_epochs  + 1
    self.total_epochs      = self.total_epochs  + num_epochs
    self.training_history  = self.training_history  + [history]
    self.super_epoch_start = self.super_epoch_start + [start]
    self.super_epoch_end   = self.super_epoch_end   + [end]

In [0]:
import os

class TrainingSessionProvider:
  """Provides a TrainingSession object by loading a existing or
     creating a new one."""
     
  def provide(self, path_config, training_mode):
    if training_mode is TrainingMode.NEW_TRAINING:
      return self._create_session()

    elif training_mode is TrainingMode.RESUME_TRAINING:
      path = os.path.join(path_config.session_path, path_config.session_file)
      return self._load_session(path)

    elif training_mode is TrainingMode.TRY_RESUME_TRAINING:
      path = os.path.join(path_config.session_path, path_config.session_file)
      return self._try_load_session_otherwise_create(path)
    
    else:
      raise ValueError("Unexpected enum value: %d" % training_mode)

  def _try_load_session_otherwise_create(self, path):
    session = None
    print("Try loading session")
    try:
      session = self._load_session(path)
    except FileNotFoundError:
      print("Session not found")
      session = self._create_session()
    return session

  def _load_session(self, path):
    print("Loading session from:", path)
    return TrainingSession.load(path)

  def _create_session(self):
    print("Creating new session")
    return TrainingSession()

In [0]:
from abc import ABC, abstractmethod
import tensorflow as tf

class AbstractModelProvider(ABC):
  """Model provider that loads a existing model or creates a new one."""

  def provide(self, path_config, training_mode):
    if training_mode is TrainingMode.NEW_TRAINING:
      return self._create_model()

    elif training_mode is TrainingMode.RESUME_TRAINING:
      path = os.path.join(path_config.model_path, path_config.model_file)
      return self._load_model(path)

    elif training_mode is TrainingMode.TRY_RESUME_TRAINING:
      path = os.path.join(path_config.model_path, path_config.model_file)
      return self._try_load_model_otherwise_create(path)
    
    else:
      raise ValueError("Unexpected enum value: %d" % training_mode)

  def _try_load_model_otherwise_create(self, path):
    model = None
    print("Try loading model")
    try:
      model = self._load_model(path, self._supply_custom_objects())
    except OSError:
      print("Model not found")
      model = self._create_model()
    return model

  def _load_model(self, path, custom_objects):
    print("Loading model from:", path)
    return tf.keras.models.load_model(
        path,
        custom_objects=custom_objects, 
        compile=False)                    # model is recompiled before training

  def _supply_custom_objects(self):
    """Override this method to define custom objects for keras when loading an
       existing model, since custom objects are not saved to the model file."""
    return None

  @abstractmethod
  def _create_model(self):
    """Abstract method to create the model."""
    pass

In [0]:
class TrainingConfig:
  """Holds the training configuration for the Trainer object."""

  def __init__(self,
               optimizer,             # optimizer for training the model
               loss_function,         # loss function to maximize
               metrics,               # metrics to calculate
               train_batch_size,      # batch size for training data
               test_batch_size,       # batch size for testing data
               epochs,                # number of epochs
               super_epochs           # number of super epochs, total epochs = super_epochs * epochs
               ):
    self.optimizer        = optimizer
    self.loss_function    = loss_function
    self.metrics          = metrics
    self.train_batch_size = train_batch_size
    self.test_batch_size  = test_batch_size
    self.epochs           = epochs
    self.super_epochs     = super_epochs

In [0]:
import os
import datetime

class Trainer:
  """Executes the training of a model and also saves checkpoints of the trained
     model and traing session."""

  def __init__(self, training_session, model, path_config, training_config):
    self.training_session      = training_session
    self.model                 = model
    self.path_config           = path_config
    self.training_config       = training_config
    self._num_train_samples    = None
    self._num_test_samples     = None
    self._train_data_generator = None
    self._test_data_generator  = None
    self._checkpoint_callback  = None

  def _prepare_training(self):
    # get width and height of input layer
    INPUT_SIZE = self.model.input_shape[1:3]

    # get number of training samples
    self._num_train_samples = self._count_samples(self.path_config.train_path)

    # get number of test samples
    self._num_test_samples = self._count_samples(self.path_config.test_path)

    # generator for training data
    self._train_data_generator = tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory(
        self.path_config.train_path,
        target_size   = INPUT_SIZE,
        batch_size    = self.training_config.train_batch_size,
        interpolation = "bilinear")

    # generator for test data
    self._test_data_generator = tf.keras.preprocessing.image.ImageDataGenerator().flow_from_directory(
        self.path_config.test_path,
        target_size   = INPUT_SIZE,
        batch_size    = self.training_config.test_batch_size,
        interpolation = "bilinear")

    # create folder for model checkpoints
    os.makedirs(self.path_config.model_path, exist_ok=True)

    # define callback to save the model
    self._checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath          = os.path.join(self.path_config.model_path, self.path_config.model_file),
        monitor           = 'val_accuracy', 
        verbose           = 1, 
        save_best_only    = False,
        save_weights_only = False,
        mode              = 'max')

    # (re)compile model with specified optimizer, loss funtion and metrics
    self.model.compile(
        optimizer = self.training_config.optimizer, 
        loss      = self.training_config.loss_function, 
        metrics   = self.training_config.metrics)
    
  def _count_samples(self, path):
    count = 0
    for category in os.listdir(path):
      count = count + len(os.listdir(os.path.join(path, category)))
    return count

  def _get_mins_and_secs_from_secs(self, seconds):
    mins = seconds // 60
    secs = seconds %  60
    return (mins, secs)
    
  def _execute_training(self):
    try:
      while self.training_session.super_epochs < self.training_config.super_epochs:
        print("Training super epoch %d of %d" 
              % (self.training_session.super_epochs + 1, self.training_config.super_epochs))

        start = datetime.datetime.now()
        history = self.model.fit_generator(
                generator        = self._train_data_generator,
                steps_per_epoch  = self._num_train_samples // self.training_config.train_batch_size,
                epochs           = self.training_config.epochs,
                callbacks        = [self._checkpoint_callback],
                validation_data  = self._test_data_generator,
                validation_steps = self._num_test_samples // self.training_config.test_batch_size)
        end = datetime.datetime.now()

        self.training_session.append(
            num_epochs = self.training_config.epochs, 
            history    = history.history,
            start      = start,
            end        = end)   
        self.training_session.save(
            os.path.join(self.path_config.session_path, self.path_config.session_file))
        
        print("Finished super epoch within %02d:%02d (mins:secs)" 
              % self._get_mins_and_secs_from_secs((end - start).total_seconds()))
      
      print("Done with training %d super epochs" % self.training_session.super_epochs)
    except KeyboardInterrupt:
      print("\nTraining aborted by user")
    
  def train(self):
    self._prepare_training()
    self._execute_training()

## Configuration

### Path variables

In [0]:
#@title Set colab path in Google Drive

#@markdown Path to the folder where colab should read data from and write to
GDRIVE_COLAB_PATH       = "colab"           # @param {type: "string"}

In [0]:
#@title Set experiment group and experiment name
#@markdown The output path for this experiment is determined by the following two variables.

#@markdown Name of the experiment group this experiment belongs to
EXPERIMENT_GROUP        = "Default"   # @param {type: "string"}

#@markdown Name of this experiment
EXPERIMENT_NAME         = "ResNet-50"      # @param {type: "string"}

In [0]:
import os

# basic paths
GDRIVE_MOUNTING_PATH    = "/gdrive"                                             # path where the google drive will be mounted
                                                                                # remote path of colab data
REMOTE_DATA_PATH        = os.path.join(GDRIVE_MOUNTING_PATH, "My Drive", GDRIVE_COLAB_PATH)  
LOCAL_DATA_PATH         = "/home/data"                                          # local path of colab data
REMOTE_ROBOCUP          = os.path.join(REMOTE_DATA_PATH, "Robo Cup")            # remote path of robo cup data
LOCAL_ROBOCUP           = os.path.join(LOCAL_DATA_PATH,  "Robo Cup")            # local path of robo cup data

# required paths for data mangling
TRAIN_PATH_UNSORTED     = os.path.join(LOCAL_ROBOCUP, "train")                  # path of train data before data reorganization
TEST_PATH_UNSORTED      = os.path.join(LOCAL_ROBOCUP, "test")                   # path of test data before data reorganization

# required paths for training
TRAIN_PATH_SORTED       = os.path.join(LOCAL_ROBOCUP, "train_sorted")           # path of train data after data reorganization
TEST_PATH_SORTED        = os.path.join(LOCAL_ROBOCUP, "test_sorted")            # path of test data after data reorganization

REMOTE_OUTPUT_PATH      = os.path.join(REMOTE_DATA_PATH, "output")              # path where output of colab is written to or read from
                                                                                # used for saving the training session, model and analysis
                                                                            
                                                                                # path where the output of the experiment is written to
REMOTE_OUTPUT_EXPERIMENT_PATH = os.path.join(REMOTE_OUTPUT_PATH, EXPERIMENT_GROUP, EXPERIMENT_NAME)                             
                                                                            
                                                                                # path where the analysis results are written to
REMOTE_OUTPUT_ANALYSIS_PATH   = os.path.join(REMOTE_OUTPUT_EXPERIMENT_PATH, "analysis")
                        


### Path configuration for training

In [0]:
PATH_CONFIG = PathConfig(
    model_path   = REMOTE_OUTPUT_EXPERIMENT_PATH,
    model_file   = "RoboCup-model.hdf5", 
    session_path = REMOTE_OUTPUT_EXPERIMENT_PATH, 
    session_file = "session.pickle",
    train_path   = TRAIN_PATH_SORTED, 
    test_path    = TEST_PATH_SORTED)

### Training mode

In [0]:
TRAINING_MODE = TrainingMode.TRY_RESUME_TRAINING

### Training config

In [0]:
import tensorflow as tf

TRAINING_CONFIG = TrainingConfig(
    optimizer        = tf.keras.optimizers.Adam(), 
    loss_function    = tf.keras.losses.categorical_crossentropy, 
    metrics          = [tf.keras.metrics.Accuracy(), tf.keras.metrics.CategoricalAccuracy()],
    train_batch_size = 10, 
    test_batch_size  = 10, 
    epochs           = 1, 
    super_epochs     = 50 )

### Model provider

In [0]:
class ModelProvider(AbstractModelProvider):
  def _create_model(self):
    print("Creating new model")
    # get model with pre trained weights
    model = tf.keras.applications.ResNet50(weights="imagenet")
    # replace last softmax layer from ImageNet domain with new soft max layer for our RoboCup domain 
    x = tf.keras.layers.Dense(8, activation='softmax', name='predictions')(model.layers[-2].output)
    # create new model
    return tf.keras.models.Model(inputs=model.input, outputs=x)

MODEL_PROVIDER = ModelProvider()

## Execute experiment

### Download train and test data from Google Drive

Authenticate and mount Google Drive

In [0]:
from google.colab import drive

drive.mount(GDRIVE_MOUNTING_PATH)

### Copy RoboCup data from Google Drive to machine

In [0]:
SRC = "'" + REMOTE_ROBOCUP + "'"
DST = "'" + LOCAL_ROBOCUP  + "'"
!mkdir -p $DST
!cp -r $SRC/. $DST

### Unzip train and test data

In [0]:
!unzip $DST/train.zip -d $DST
!unzip $DST/test.zip -d $DST 

### Reorganize train and test data

Group test and train images after their corresponding labels


In [0]:
sort_images_after_labels(TRAIN_PATH_UNSORTED, TRAIN_PATH_SORTED)
sort_images_after_labels(TEST_PATH_UNSORTED,  TEST_PATH_SORTED)

### Load or create training session

In [0]:
TRAINING_SESSION = TrainingSessionProvider().provide(PATH_CONFIG, TRAINING_MODE)

### Load or create model

In [0]:
MODEL = MODEL_PROVIDER.provide(PATH_CONFIG, TRAINING_MODE)

### Train the model

In [0]:
TRAINER = Trainer(
    training_session = TRAINING_SESSION,
    model            = MODEL,
    path_config      = PATH_CONFIG, 
    training_config  = TRAINING_CONFIG)

TRAINER.train()

# Experiment analysis

## Define custom implementations

In [0]:
import os

def files_in_dir(path):
  return [os.path.join(path, file_name) for file_name in os.listdir(path)]

In [0]:
def merge_histories(old, new):
  result = old.copy()
  for k, v in new.items():
    if k not in result:
      result[k] = []
    result[k] = [*result[k], *new[k]]
  return result

def merge_all_histories(histories):
  entire_history = {}
  for history in histories:
    entire_history = merge_histories(entire_history, history)
  return entire_history

In [0]:
def training_seconds(session_data):
  sum = 0
  for start, end in zip(session_data.super_epoch_start, session_data.super_epoch_end):
    sum = sum + (end - start).total_seconds()
  return sum

In [0]:
import matplotlib.pyplot as plt
import numpy as np
import os

def load_image(path):
  INPUT_SIZE = MODEL.input_shape[1:3]
  return tf.keras.preprocessing.image.load_img(path, target_size=INPUT_SIZE)

def test_image(model, path):
  INPUT_SIZE = model.input_shape[1:3]
  img = tf.keras.preprocessing.image.load_img(os.path.join(TEST_PATH_SORTED, path), target_size=INPUT_SIZE)
  img_as_array = tf.keras.preprocessing.image.img_to_array(img)
  prediction = model.predict(np.array([img_as_array]))
  print("probabilities:", prediction)
  print("argmax:", np.argmax(prediction))
  plt.imshow(img)

def predict_image_probs(model, path):
  INPUT_SIZE = model.input_shape[1:3]
  img = tf.keras.preprocessing.image.load_img(os.path.join(TEST_PATH_SORTED, path), target_size=INPUT_SIZE)
  img_as_array = tf.keras.preprocessing.image.img_to_array(img)
  return model.predict(np.array([img_as_array]))

def predict_image_argmax(model, path):
  return np.argmax(predict_image_probs(model, path))

def predict_all_test_images(model, test_path_sorted):
  predictions = [(
      predict_image_argmax(model, os.path.join(test_path_sorted, label, image)),
      int(label), 
      image, 
      os.path.join(test_path_sorted, label, image))
    for label in os.listdir(test_path_sorted)
    for image in os.listdir(os.path.join(test_path_sorted, label))]
  
  # split up list
  pred, label, image, path = zip(*predictions) 
  return np.array(pred), np.array(label), np.array(image), np.array(path)

In [0]:
import matplotlib.pyplot as plt

def plot_history(history):
  # Plot training & validation accuracy values
  plt.plot(history['accuracy'])
  plt.plot(history['val_accuracy'])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  plt.show()

  # Plot training & validation categorical accuracy values
  plt.plot(history['categorical_accuracy'])
  plt.plot(history['val_categorical_accuracy'])
  plt.title('Model categorical accuracy')
  plt.ylabel('Categorical Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  plt.show()

  # Plot training & validation loss values
  plt.plot(history['loss'])
  plt.plot(history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Test'], loc='upper left')
  plt.show()

In [0]:
def find_falsely_classified_images(preds, labels, images, paths):
  mask = preds != labels
  false_preds  = preds[mask]
  false_labels = labels[mask]
  false_images = images[mask]
  false_paths  = paths[mask]

  return (false_preds, false_labels, false_images, false_paths)

In [0]:
def plot_falsely_classified_images(false_preds, false_labels, false_images, false_paths):
  translation = {0: "Ball",
                1: "Goal post", 
                2: "Obstacle", 
                3: "L-Line",
                4: "X-Line",
                5: "T-Line",
                6: "Penalty spot",
                7: "Robot foot"}

  rows = len(false_images) + 1      # +1 for heading row
  cols = 3
  fig = plt.figure(figsize=(8, 3*rows))

  fig.add_subplot(rows, cols, 1)
  plt.axis('off')
  plt.text(0, 0.5, "Bild", fontsize=14)

  fig.add_subplot(rows, cols, 2)
  plt.axis('off')
  plt.text(0, 0.5, "Vorhersage", fontsize=14)

  fig.add_subplot(rows, cols, 3)
  plt.axis('off')
  plt.text(0, 0.5, "Label", fontsize=14)

  for i in range(1,rows):
      img = load_image(false_paths[i-1])
      # create subplot for image
      fig.add_subplot(rows, cols, i*3+1)
      plt.axis('off')
      #plt.tight_layout()
      plt.imshow(img)
      # create subplot for prediction
      fig.add_subplot(rows, cols, i*3+2)
      plt.axis('off')
      #plt.tight_layout()
      plt.text(0, 0.5, translation[false_preds[i-1]], fontsize=14)
      # create subplot for label
      fig.add_subplot(rows, cols, i*3+3)
      plt.axis('off')
      #plt.tight_layout()
      plt.text(0, 0.5, translation[false_labels[i-1]], fontsize=14)
  plt.show()

## Execute analysis

### Calculate training time

In [0]:
training_seconds(TRAINING_SESSION) / 60   # minutes

### Plot training history

In [0]:
plot_history(merge_all_histories(TRAINING_SESSION.training_history))

### Predict all images

In [0]:
preds, labels, images, paths = predict_all_test_images(MODEL, TEST_PATH_SORTED)

### Plot confusion matrix

In [0]:
from sklearn.metrics import confusion_matrix
import os
import seaborn as sn

confusion = confusion_matrix(labels, preds)

sn.heatmap(confusion)
plt.show()

confusion

### Find all falsely classified images

In [0]:
false_preds, false_labels, false_images, false_paths = find_falsely_classified_images(preds, labels, images, paths)

for i in range(0, len(false_preds)):
  print("label(%d) prediction(%d) %s %s" % (false_labels[i], false_preds[i], false_images[i], false_paths[i]))

### Plot wrongly classified images


In [0]:
plot_falsely_classified_images(false_preds, false_labels, false_images, false_paths)

### Test single images

In [0]:
# test a particular image
test_image(MODEL, os.path.join(TEST_PATH_SORTED, "1/1761.png"))

### Visualize model

In [0]:
# visualize model
tf.keras.utils.plot_model(MODEL)

# Run TensorBoard (this notebook does not yet track something with TensorBoard)

Tutorial how to run TensorBoard in Cloab from:
[https://www.dlology.com/blog/quick-guide-to-run-tensorboard-in-google-colab/](https://www.dlology.com/blog/quick-guide-to-run-tensorboard-in-google-colab/)


In [0]:
!nvidia-smi

In [0]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

In [0]:
LOG_DIR = './log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [0]:
get_ipython().system_raw('./ngrok http 6006 &')

In [0]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"