# Instruction
1. Check if you have the following folders:
   

*   data/models/cnn
*   data/augment


   
2. Check if you have the following files 
   
*   data/training.csv
*   data/test.csv
*   data/IdLookupTable.csv

3. Make sure you **do not have** the following files

*   data/augment/augdat_*.p
*   data/models/cnn/*.p 
*   data/models/cnn/*.h5 
*   data/submission_data.p
*   data/transformed_data.p
*   data/processed_data.p

4. In the model description section(few cells below) choose your option

5. On Edit -> Notebook settings -> choose Hardware accelerator 'GPU' and Runtime-shape "High RAM"

6. Runtime -> Run all (follow instructions to authenticate)

If the Colab stops, simply restart the code by "Run all", it will start where it left off

In [None]:
import os
import sys
from pathlib import Path
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd drive/MyDrive/Colab\ Notebooks/cct

In [None]:
base = Path('drive/MyDrive/Colab\ Notebooks/cct/')
sys.path.insert(0, str(base))

In [None]:
!apt-get install build-essential cmake
!apt-get install libopenblas-dev liblapack-dev 
!pip3 install face_recognition
!pip3 install mtcnn

In [None]:
import numpy as np
from modules.models import CNN
from modules.models import Means
from modules.MultiModel import MultiModel
from modules.source_data import SourceData

# Model description

In [None]:
# Is it debug mode?
# If running debug remmeber to remove all created files afterwards
debug = False  # Setting it to True will set off a quick run

# Response names for all the parameters
response_names = ['left_eye_center', 'right_eye_center', 'left_eye_inner_corner', 
                  'left_eye_outer_corner', 'right_eye_inner_corner',  
                  'right_eye_outer_corner', 'left_eyebrow_inner_end', 
                  'left_eyebrow_outer_end', 'right_eyebrow_inner_end', 
                  'right_eyebrow_outer_end', 'nose_tip', 'mouth_left_corner', 
                  'mouth_right_corner', 'mouth_center_top_lip', 
                  'mouth_center_bottom_lip']

if not debug:
  # Which model are we going to use for this
  my_model = CNN
else:
  # Run the means
  my_model = Means

# Augment the data or not
do_augment = True

# Prefix to give all saved files 
prefix = 'center_compare'

option = 1 
if option == 1:
  # option 1: Do not do any centering just run on raw data
  centering_params = {'skip_center': True}
elif option == 2:
  # option 2: Center image but do not scale and when there are multiple faces, pick the one in the middle
  centering_params = {'skip_center': False, 'do_scale': False, 
                      'pick_center_image': True}
elif option == 3:
  # option 3: Center image, scale and when there are multiple faces, pick the one in the middle
  centering_params = {'skip_center': False, 'do_scale': True, 
                      'pick_center_image': True}
elif option == 4:
  # option 4: Center image, scale and when there are multiple faces, pick the biggest
  centering_params = {'skip_center': False, 'do_scale': True, 
                      'pick_center_image': False}
else:
  raise ValueError('You must choose of these options')
print('Centering params are %s' % str(centering_params))

if not debug: 
  # Augmentation parameters for each response name
  augmentation_params = {'left_eye_inner_corner': {'num_transforms': 12},
                          'left_eye_outer_corner': {'num_transforms': 12},
                          'right_eye_inner_corner': {'num_transforms': 12},
                          'right_eye_outer_corner': {'num_transforms': 12},
                          'left_eyebrow_inner_end': {'num_transforms': 12},
                          'left_eyebrow_outer_end': {'num_transforms': 12},
                          'right_eyebrow_inner_end': {'num_transforms': 12},
                          'right_eyebrow_outer_end': {'num_transforms': 12},
                          'mouth_left_corner': {'num_transforms': 12},
                          'mouth_right_corner': {'num_transforms': 12},
                          'mouth_center_top_lip': {'num_transforms': 12},
                          'mouth_center_bottom_lip': {'num_transforms': 8},
                          'nose_tip': {'num_transforms': 8},
                          'left_eye_center': {'num_transforms': 8},
                          'right_eye_center': {'num_transforms': 8}}
else:
  # Augmentation parameters for each response name
  augmentation_params = {'left_eye_inner_corner': {'num_transforms': 1},
                          'left_eye_outer_corner': {'num_transforms': 1},
                          'right_eye_inner_corner': {'num_transforms': 1},
                          'right_eye_outer_corner': {'num_transforms': 1},
                          'left_eyebrow_inner_end': {'num_transforms': 1},
                          'left_eyebrow_outer_end': {'num_transforms': 1},
                          'right_eyebrow_inner_end': {'num_transforms': 1},
                          'right_eyebrow_outer_end': {'num_transforms': 1},
                          'mouth_left_corner': {'num_transforms': 1},
                          'mouth_right_corner': {'num_transforms': 1},
                          'mouth_center_top_lip': {'num_transforms': 1},
                          'mouth_center_bottom_lip': {'num_transforms': 1},
                          'nose_tip': {'num_transforms': 1},
                          'left_eye_center': {'num_transforms': 1},
                          'right_eye_center': {'num_transforms': 1}}
                      

# Which points datapoints to eliminate in which group
# eliminate_params = {0: ['nose_tip', 'mouth_center_bottom_lip'], 1: []}
eliminate_params = None

# Response names to model: None signifies all
# model_responses = ['left_eye_center', ]
model_responses = None

In [None]:
# Random seed for splitting train/cv/test data
seed = 27652

# Source the data here
sd = SourceData(debug=debug, center_params=centering_params)

# As this is the final run we will include the test data as part of the training
data = sd.source_data(combine_train_test=True, seed=seed)

Create models using the optimal parameters and save the best model. Best is the one with minimum validation loss

In [None]:
# Create a set of models using the means model
mmo = MultiModel(my_model, prefix=prefix, 
                 eliminate=eliminate_params, 
                 augment_params=augmentation_params,
                 response_names=model_responses)

# Fit the models
train_data = data['train']
cv_data = data['cv']
mmo.fit(train_data, cv_data, fit_params_dict={'epochs': 300},
        optimizer_params_dict={'name': 'sgd', 'learning_rate': 0.00005, 'momentum': 0.8}
        )

# Create submission file

In [None]:
if 0:
  # Create a set of models using ny_nodel
  mmo = MultiModel(my_model, prefix=prefix)
  # Load the 
  mmo.load(response_names)

# Create the submission file
submmision_data = sd.source_test_csv()
mmo.create_submission(submission_data=submmision_data)

# Test data predictions and analysis

In [None]:
  if 0:
    # Data does not exist then pull it from file
    try:
      # Make predictions on test data
      test_data = preprocessed['test_labeled']
    except NameError:
      # Source the data with or without augmenation(default is with)
      data, _ = source_data()

      # Transform all the images in the DataFrame
      preprocessed = preprocess_multiple_data(
          data_dict=data, drop_orig=True, debug=False, save_test=True)
      
      # Make predictions on test data
      test_data = preprocessed['test_labeled']

    # Need to compare against the original y values
    test_data = test_data.rename(columns={'y': 'translated_y', 'orig_y': 'y'})

    # If 
    try:
      pred, metrics = mmo.predict(test_data)
    except NameError:
      # Create a set of models using the means model
      mmo = MultiModel(my_model, prefix=prefix)
      mmo.load(response_names)

      pred, metrics = mmo.predict(test_data)

    print(metrics)
    print('Average error %.2f' % np.mean([x for x in metrics.values()]))

In [None]:
if 0:
  # Amongst the pair of values, which one is the worst?
  pred['delta'] = pred[['y_invert', 'y_true']].apply(lambda x: np.max(np.abs(x[0]-x[1])), axis=1)

  # Create a histogram of those values
  import matplotlib.pyplot as plt
  plt.hist(pred['delta'].values, bins=96)
  plt.xlabel('Error in prediction')
  plt.ylabel('Count of error')
  plt.title('Histogram of prediction error')
  plt.show()

In [None]:
if 0:
  # Create a DataFrame of values in the  tail region
  pred = pred.sort_values(by='delta', ascending=False, axis=0)
  tail_df = pred[:10]
  for cnt, x in enumerate(tail_df['X'].values):
    plt.figure()
    plt.imshow(x)
    if cnt > 10:
      break

In [None]:
if 0:
  # Create a DataFrame of values in the  core region
  tail_df = pred[10:50]
  for cnt, x in enumerate(tail_df['X'].values):
    plt.figure()
    plt.imshow(x)