## **Setup**

In [None]:
from google.colab import drive
mount_path = '/content/gdrive/'
drive.mount(mount_path)

model_path='My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/'

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [None]:
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import pytz
from zipfile import ZipFile
from tempfile import TemporaryDirectory
import requests
import cv2
import shutil
import glob

import tensorflow as tf
from tensorflow import keras

from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

# from sklearn.metrics import roc_auc_score, roc_curve, auc
# from sklearn.model_selection import train_test_split

# Load the TensorBoard notebook extension
%load_ext tensorboard

from google.colab.patches import cv2_imshow 

# plot options
# plt.rcParams.update({'font.size': 11})
plt.style.use('fivethirtyeight')

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


# Data Loading

In [None]:
# custom copytree because Colab doesn't have the latest version of shutil.copytree which now contains the dirs_exist_ok flag solving this issue
# copied verbatim from https://stackoverflow.com/a/12514470/5991868
def copytree(src, dst, symlinks=False, ignore=None):
    for item in os.listdir(src):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, symlinks, ignore)
        else:
            shutil.copy2(s, d)

In [None]:
# home_dir is the home directory for the images, from which training and validation splits will be taken
home_dir='/content/All_Data/'

# this can take a bit (maybe 30s)
if not os.path.exists(home_dir):
  os.mkdir(home_dir)
  for cview in ['Front','Left','Rear','Right']:
    os.mkdir(os.path.join(home_dir,'camera'+cview))

  urlbase = 'https://datasets.aicrowd.com/default/aicrowd-practice-challenges/public/autodri/v0.1/'

  for filename in ['train','val']:
    # create a temporary directory using TemporaryDirectory and context manager and unzip to there
    with TemporaryDirectory() as tmpdirname:
      # download the file
      requrl = requests.get(f'{urlbase}{filename}.zip')
      with open(f'{tmpdirname}/{filename}.zip', "wb") as zip:
        zip.write(requrl.content)
      # unzip
      with ZipFile(f'{tmpdirname}/{filename}.zip','r') as zip_ref:
        zip_ref.extractall(tmpdirname)
      # copy the data to the All_Data dir
      shutil.copy2(f'{tmpdirname}/{filename}/{filename}.csv',f'{home_dir}/{filename}.csv')
      for cview in ['Front','Left','Rear','Right']:
        copytree(f'{tmpdirname}/{filename}/camera{cview}/',f'{home_dir}/camera{cview}/')


In [None]:
combdf=pd.read_csv(f'{home_dir}/train.csv').append(
    pd.read_csv(f'{home_dir}/val.csv'),ignore_index=True)

# creat the img_list column
combdf['img_list']=combdf['filename'].apply(
    lambda fname: [os.path.join(home_dir,f'camera{cview}/{fname}') for cview in ['Left','Front','Right','Rear']])

In [None]:
combdf.head()

Unnamed: 0,filename,canSteering,img_list
0,33856.jpg,180.0,"[/content/All_Data/cameraLeft/33856.jpg, /cont..."
1,61909.jpg,-194.370014,"[/content/All_Data/cameraLeft/61909.jpg, /cont..."
2,36269.jpg,-39.000471,"[/content/All_Data/cameraLeft/36269.jpg, /cont..."
3,60259.jpg,-185.300714,"[/content/All_Data/cameraLeft/60259.jpg, /cont..."
4,50681.jpg,44.939983,"[/content/All_Data/cameraLeft/50681.jpg, /cont..."


In [None]:
def trainvaldfs(combdf, num_folds, val_pct, truly_random):
  '''
  returns a dictionary[fold_num][train or val] : dataframe
  eg cvdict[0]['train']
  '''
  cvdict={key:{'train':None,'val':None} for key in range(num_folds)}

  assert(val_pct<=(1/num_folds))

  if truly_random:
    rng = np.random.RandomState() # random seeding
  else:
    rng = np.random.RandomState(42) # deterministic seeding
  
  inds=combdf.index.to_list()
  # shuffle the image array. NOTE all the randomness in the train-val split comes from this shuffle
  rng.shuffle(inds)

  # size of the fold
  foldsz=np.floor(len(inds)/num_folds).astype(int)
  # how many validation items
  numval=np.floor(val_pct*len(inds)).astype(int)

  for cvfold in range(num_folds):
    valinds=inds[(cvfold*foldsz):(cvfold*foldsz+numval)]
    cvdict[cvfold]['val']=combdf.loc[valinds]
    cvdict[cvfold]['train']=combdf.loc[np.setdiff1d(inds,valinds)]

  return cvdict

In [None]:
cvdict=trainvaldfs(combdf,3,0.15,False)

In [None]:
# ind=np.random.choice(cvdict[0]['train'].index)
# for img in cvdict[0]['train'].loc[ind,'img_list']:
#   cv2_imshow(cv2.imread(img))

In [None]:
def plot_training_history(history):
  # keras appends a number after some of the keys, this little ditty here just pulls them out
  histkeys=pd.Series(list(history.history.keys()))
  histkeys=histkeys[histkeys.str.contains('mse')]
  val_key=histkeys[histkeys.str.contains('val')].values[0]
  auc_key=histkeys[~histkeys.str.contains('val')].values[0]

  plt.figure(figsize=(10, 5))
  # summarize history for auc
  plt.subplot(1,2,1)
  plt.plot(history.history[auc_key])
  plt.plot(history.history[val_key])
  plt.title('Training MSE')
  plt.ylabel('MSE')
  plt.xlabel('Epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.grid(True)
  plt.tight_layout()
  
  # summarize history for loss
  plt.subplot(1,2,2)
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('Training Loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['train', 'test'], loc='upper left')
  plt.grid(True)
  plt.tight_layout()

  plt.show()

# Model Setup

In [None]:
base_model = keras.applications.DenseNet201(include_top = False, weights = 'imagenet',
    input_shape = (224, 224, 3))

In [None]:
# create a Sequential model
model = keras.models.Sequential()

# add base_model for 4 input images (keeping the right shape
model.add(keras.layers.TimeDistributed(base_model, input_shape=(4, 224, 224, 3)))

# now, flatten on each output to send 4 outputs with one dimension to LSTM
model.add(keras.layers.TimeDistributed(keras.layers.Flatten()))
model.add(keras.layers.LSTM(256, activation='relu', return_sequences=False))

# finalize with standard MLP
model.add(keras.layers.Dense(128, activation=None))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.25))

model.add(keras.layers.Dense(64, activation=None))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dropout(0.25))

model.add(keras.layers.Dense(1,activation='linear'))



In [None]:
# freeze the base model which is inside the first timedistributed layer
model.layers[0].trainable=False

### Compile model

In [None]:
init_lr=1e-2
optimizer = keras.optimizers.Nadam(lr=init_lr)
loss_function = keras.losses.MeanSquaredError()

model.compile(loss=loss_function, optimizer=optimizer)

In [None]:
def get_imgs(imlist):
  imgs=[]
  for impath in imlist:
    img = load_img(impath,target_size=(224, 224))
    imgs.append(img_to_array(img))
    # Pillow images should be closed after `load_img`,
    # but not PIL images.
    if hasattr(img, 'close'):
      img.close()
                
  return np.stack(imgs,axis=0)

def imageseq_generator(df, batch_size = 64):  
  inds=df.index.to_list()
  while True:
    # shuffle the indices for the epoch
    np.random.shuffle(inds)

    # Get index to start each batch: [0, batch_size, 2*batch_size, ..., max multiple of batch_size &lt;= num_samples]
    for offset in range(0, len(inds), batch_size):
      # Get the samples you'll use in this batch
      batch_inds = inds[offset:(offset+batch_size)]

      batch_input  = []
      batch_output = []     
      # Read in each input, perform preprocessing and get labels
      for ind in batch_inds:
        batch_input.append(get_imgs(df.loc[ind,'img_list']))
        batch_output.append(df.loc[ind,'canSteering'])

      # Return a tuple of (input, output) to feed the network    
      yield (np.array(batch_input), np.array(batch_output))

In [None]:
# # set up Tensorboard
# logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
# %tensorboard --logdir logs

In [None]:
# early stopping callback
# patience is number of epochs without improvement
early_stopping_cb = keras.callbacks.EarlyStopping(patience= 4, min_delta = 100, restore_best_weights = True)

In [None]:
# # tensorboard 
# logdir = os.path.join("logs", datetime.datetime.now(pytz.timezone('US/Eastern')).strftime("%y%m%d_%H%M"))
# tb_cb = keras.callbacks.TensorBoard(logdir)

cv_fold=1

datestr=datetime.datetime.now(pytz.timezone('US/Eastern')).strftime("%y%m%d_%H%M")
checkpoint_filepath=f'{os.path.join(mount_path,model_path)}{datestr}_DenseNet_cv{cv_fold}_AutoDrive_frozen'
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_filepath,
    save_weights_only = False,
    monitor = 'val_loss',
    mode = 'min',
    save_best_only = True,
    verbose = 1)

train_df=cvdict[cv_fold]['train']
val_df=cvdict[cv_fold]['val']

batch_size = 64
max_epochs=25

train_steps=train_df.shape[0]//batch_size
val_steps=val_df.shape[0]//batch_size

# train it!
history = model.fit(imageseq_generator(train_df,batch_size), epochs = max_epochs,
                    validation_data = imageseq_generator(val_df,batch_size),
                    steps_per_epoch = train_steps, validation_steps = val_steps,
                    callbacks = [early_stopping_cb, model_checkpoint_cb])

Epoch 1/25
Epoch 00001: val_loss improved from inf to 15708.64355, saving model to /content/gdrive/My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/200904_2345_DenseNet_cv1_AutoDrive_frozen
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/200904_2345_DenseNet_cv1_AutoDrive_frozen/assets
Epoch 2/25
Epoch 00002: val_loss improved from 15708.64355 to 9000.51660, saving model to /content/gdrive/My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/200904_2345_DenseNet_cv1_AutoDrive_frozen
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/200904_2345_DenseNet_cv1_AutoDrive_frozen/assets
Epoch 3/25
Epoch 00003: val_loss improved from 9000.51660 to 6234.36279, saving model to /content/gdrive/My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/200904_2345_DenseNet_cv1_AutoDrive_frozen
INFO:tensorflow:Assets written to: /content/gdrive/My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/200904_