# Data Science for the Automotive Industry: Second practical session - DL

In this session, we will dive into deep learning. We will grasp the potential of neural networks in various applications increasing the level of complexity.

1. Application of NN on a linear function
2. Application of NN on non linear functions
3. Application of NN for image classification

Developed by Nicolas Gutierrez in January 2022.

## Importing required libraries
It is a good practice loading the required libraries for the code at the start of it. Additionally, doing it this way you can have some hints about what the code below will do, just by checking the types of libraries imported.

In [None]:
### Do not modify this cell, not an exercise

# File operations
import glob
import os
# Numeric operations
import numpy as np
# Neural networks
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Libraries for plotting
import matplotlib.pyplot as plt
%matplotlib inline
import time
import pylab as pl
from IPython import display
# Libraries for pictures
import PIL

## Neural network for linear function
The easiest possible example of neural network is using just one layer and try to learn a linear function. We will do that step by step.

### Preparation of the data

In [None]:
### Exercise 1: Create a linear function with slope and bias (intercept)

def linear_function(x):
  # Complete the following line with a linear function of x
  y = 
  #
  return y

In [None]:
### Exercise 2: Select a training and test interval

# Complete the following lines with either values or lists
start_point_of_training = 
end_point_of_training = 

values_for_testing = 
#

x_train = np.linspace(start_point_of_training, end_point_of_training, 256).reshape(-1, 1)
y_train = linear_function(x_train).reshape(-1, 1)

x_test = np.array(values_for_testing).reshape(-1,1)
y_test = linear_function(x_test).reshape(-1, 1)

### Instantiation of the model and definition of the NN
In this practical session, we will make use of tensorflow and keras python packages. We will need to go through several concepts first.

In [None]:
### Exercise 3: Instantiate a 1 layer 1 neuron Neuran Network

# Intantiate a sequential model from keras
model = 
#

# Add to the model an input layer of shape (1,) and a Dense layer with 1 neuron


#

In [None]:
### Exercise 4: Compile the model

# Compile the model using MeanAbsoluteError as loss, Adam as optimizer and 
# 'mean_squared_error', 'mean_absolute_error' as metrics


#


### Fitting

In [None]:
### Exercise 5: Fit the model 

# Fit the model with x_train, y_train, a batch size of 8, 256 epochs, include 
# the validation data and verbose 1
history = model.fit()
#

models = []
histories = []
models.append(model)
histories.append(history)

### Evaluation

In [None]:
### Do not modify this cell, not an exercise

def plotting_models(models, histories, x_train, y_train, x_test, y_test):
  fig, ax = plt.subplots(ncols=2, figsize=np.array([2*6.4, 4.8]))

  ax[0].scatter(x_test, y_test, label='Real values', zorder=1)
  for i in range(len(models)):
    ax[0].scatter(x_test, models[i].predict(x_test), label= f'Predictions_{i}', zorder=2)
  ax[0].axvline(np.min(x_train), label='Train interval_min', color='red', zorder=0)
  ax[0].axvline(np.max(x_train), label='Train interval_max', color='green', zorder=0)
  ax[0].set_xlabel('X values')
  ax[0].set_ylabel('Y Values')
  ax[0].legend()
  #plt.xlim(right=15)
  
  for i in range(len(histories)):
    ax[1].plot(histories[i].history['mean_absolute_error'], label=f'MAE_{i}')
    ax[1].plot(histories[i].history['val_mean_absolute_error'], label=f'MAE_val_{i}')
  ax[1].set_ylabel('Mean Absolute Error')
  ax[1].set_xlabel('epoch')
  ax[1].legend()

In [None]:
### Do not modify this cell, not an exercise

plotting_models(models, histories, x_train, y_train, x_test, y_test)

In [None]:
### Do not modify this cell, not an exercise

keras.utils.plot_model(model, 'model.png', show_shapes=True)

## NN for non linear functions
We have seen NN acting with linear functions, they can do that, but the case where they are really strong is when non linearities come into play. In this section we will tackle several examples of that

In [None]:
### Do not modify this cell, not an exercise

class CustomCallback(keras.callbacks.Callback):
    def __init__(self, epoch_number, model, x_train, x_test, y_test):
        self.epoch_number = epoch_number
        self.model = model
        self.x_test = x_test
        self.y_test = y_test

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.epoch_number == 0:
          y_pred = self.model.predict(self.x_test)
          plt.cla()
          pl.scatter(self.x_test, self.y_test, label='Real values', color=u'#1f77b4', zorder=1)
          pl.scatter(self.x_test, y_pred, label= 'Predictions', color= u'#ff7f0e', zorder=2)
          pl.axvline(np.min(x_train), label='Train interval_min', color='red', zorder=0)
          pl.axvline(np.max(x_train), label='Train interval_max', color='green', zorder=0)
          pl.xlabel('X values')
          pl.ylabel('Y Values')
          pl.title(f"Epoch {epoch}")
          # pl.legend()
          display.display(pl.gcf())
          display.clear_output(wait=True)
          time.sleep(0.05)

### PIECEWISE

#### Preparation of the data

In [None]:
### Exercise 6: Define a piecewise function using np.piecewise

def piecewise_function(x):
  # Define a piecewise function that is continuous in the interval -10 to 10
  y = 
  #
  return y

In [None]:
### Do not modify this cell, not an exercise

x_tosee = np.linspace(-10, 10, 256)
y_tosee = piecewise_function(x_tosee)
plt.scatter(x_tosee, y_tosee)
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('Piecewise function')

In [None]:
### Do not modify this cell, not an exercise

x_train = np.linspace(-10, 10, 256).reshape(-1, 1)
y_train = piecewise_function(x_train).reshape(-1, 1)

x_test = np.linspace(-50, 50, 256).reshape(-1,1)
y_test = piecewise_function(x_test).reshape(-1, 1)

#### Fitting

In [None]:
### Exercise 7: Complete the following function

def compile_and_fit_nonlinear(neurons, epochs, lr, bs, x_train, y_train, x_test, y_test, minval, maxval):
  tf.keras.backend.clear_session()

  # Instantiate a sequential model and add an input layer of shape (1,)
  model = 

  #
  for i in range(len(neurons)):
    initializer = tf.keras.initializers.RandomUniform(minval=minval, 
                                                      maxval=maxval, 
                                                      seed=1)
    # Add a Dense layer to the model with "relu" activation, 
    # kernel_initializer = initializer and bias_initializer as Ones. The number
    # of neurons will be neurons[i]
    model.add()
    #
  
  # Add a Dense layer to the model with one neuron
  
  #

  # Compile the model using MeanSquaredError as loss, Adam with learning_rate = lr 
  # as optimizer and 'mean_squared_error', 'mean_absolute_error' as metrics
  
  #
  
  # Fit the model using the following options batch size = bs, epochs=epochs, True for shuffle,
  # use training data and validation data. use as a callback the class defined
  # previously with inputs (100, model, x_train, x_test, y_test)
  history = 
  #

  model.summary()
  for i in range(len(model.layers)):
    print(f"Layer: {i}")
    print(model.layers[i].weights[0].numpy())
    print(model.layers[i].bias.numpy())
  return model, history

In [None]:
### Exercise 8: Play with the values in this cell to see how they affect the results

models = []
histories = []
model, history = compile_and_fit_nonlinear([2], 4000, 0.005, 128, x_train, y_train, x_test, y_test, -0.1, 0.1)
models.append(model)
histories.append(history)

# test_scores = model.evaluate(x_test, y_test, verbose=2)

In [None]:
# model, history = compile_and_fit_1layer_nonlinear([2], 4000, 0.005, 128, x_train, y_train, x_test, y_test)

#### Evaluation

In [None]:
### Do not modify this cell, not an exercise

plotting_models(models, histories, x_train, y_train, x_test, y_test)

### TRIGONOMETRIC

#### Preparation of the data

In [None]:
### Exercise 9: Define e trigonometric function of x using numpy

def trigonometric(x):
  # Use numpy to define a trigonometric function of x
  y = 
  #
  return y

In [None]:
### Do not modify this cell, not an exercise

# Plot to doublecheck
x_tosee = np.linspace(0, 2*np.pi, 256)
y_tosee = trigonometric(x_tosee)
plt.scatter(x_tosee, y_tosee)
plt.xlabel('X values')
plt.ylabel('Y values')
plt.title('Trigonometric function')

In [None]:
### Do not modify this cell, not an exercise

x_train = np.linspace(0, 2*np.pi, 256).reshape(-1, 1)
y_train = trigonometric(x_train).reshape(-1, 1)

x_test = np.linspace(-3*np.pi, 3*np.pi, 512).reshape(-1,1)
y_test = trigonometric(x_test).reshape(-1, 1)

#### Fitting

In [None]:
### Exercise 10: Use the function compile_and_fit_non_linear

models = []
histories = []

# Use the suggested function with at least three layers with less than 10 neurons
# thousands of epochs, lr 0.005 and bs multiple of 2 up to 256, minval -0.5 y maxval 0.5
model, history = 
#

models.append(model)
histories.append(history)

# test_scores = model.evaluate(x_test, y_test, verbose=2)

#### Evaluation

In [None]:
### Do not modify this cell, not an exercise

plotting_models(models, histories, x_train, y_train, x_test, y_test)

## NN for classification
The first exercise we will do here is developing a NN as a classifier between cars and non cars pictures. For that, we will have available a subset of pictures from the following references:

- [Car or Not a Car](https://medium.com/@oviyum/lessons-from-fine-tuning-a-convolutional-binary-classifier-ccf9388e46d8)
- [Cars Dataset](https://ai.stanford.edu/~jkrause/cars/car_dataset.html)
- [Caltech256](http://www.vision.caltech.edu/Image_Datasets/Caltech256/)

### Preparation of the data

In [None]:
### Do not modify this cell, not an exercise

# Properties of the model
img_height = 128
img_width = 128
batch_size = 256

In [None]:
### Do not modify this cell, not an exercise

# You will receive a prompt asking for permissions to access your google drive 
# from google collab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
### Exercise 11: Look for the cars and non cars folder

# Look for the following folders in your google drive
cars_training_folder = ''
cars_test_folder = ''
noncars_train_folder = ''
noncars_test_folder = ''
#

list_of_files = glob.glob(cars_training_folder)
print("\nCar training folder:")
print(list_of_files[:2])

list_of_files = glob.glob(cars_test_folder)
print("\nCar training folder:")
print(list_of_files[:2])

list_of_files = glob.glob(noncars_train_folder)
print("\nCar training folder:")
print(list_of_files[:2])

list_of_files = glob.glob(noncars_test_folder)
print("\nCar training folder:")
print(list_of_files[:2])

In [None]:
### Do not modify this cell, not an exercise

def show_example(folder_path):
  list_of_pictures = glob.glob(folder_path + "*.jpg")
  number_of_pictures = len(list_of_pictures)
  random_picture = list_of_pictures[np.random.randint(0, high=number_of_pictures-1)]
  print(f"Example file: {random_picture}")
  print(f"Number of jpg files: {number_of_pictures}")
  return str(random_picture)

In [None]:
### Exercise 12: Use the function show_example to verify you have selected the folders correctly

# Include your line here
random_picture = 
#

PIL.Image.open(random_picture)

In [None]:
### Exercise 13: Look for the correct folders to use image_dataset_from_directory from keras

# Complete the path as a string below
data_dir_train = ''
#
# Train dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir_train,
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  label_mode='binary')

# Complete the path as a string below
data_dir_test = ''
#
# Test dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir_test,
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  label_mode='binary')

class_names = train_ds.class_names
print(f"Training classes: {class_names}")

class_names = test_ds.class_names
print(f"Test classes: {class_names}")

# for image_batch, labels_batch in train_ds:
#   print(f"Shape of the pictures in the batch: {image_batch.shape}")
#   print(f"Shape of the labels in the batch: {labels_batch.shape}")
#   break

# The output of this cell should be:
# Found 1000 files belonging to 2 classes.
# Found 300 files belonging to 2 classes.
# Training classes: ['cars', 'others']
# Test classes: ['cars', 'others']
# Shape of the pictures in the batch: (256, 128, 128, 3)
# Shape of the labels in the batch: (256, 1)

In [None]:
### Do not modify this cell, not an exercise

# Extra check, if you run this cell you will a 3 by 3 set of pictures with the corresponding labels
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[int(labels[i])])
    plt.axis("off")
print(class_names)

### Fitting with convolutions

In [None]:
### Exercise 14: Complete the following function to instantiate, compile and fit a convolutional neural network

def compile_and_fit_conv(convfilters, convsize, densesize, lr, epochs, train_ds, val_ds):
  # Sanity checks
  if len(convfilters) != len(convsize):
    raise IndexError('Length of convfilters and convsize is required to be the same.')
  if len(convfilters) < 1 or len(convsize) < 1:
    raise IndexError('Length of convfilters or convsize is required to be higher than 0.')
  if len(densesize) < 1:
    raise IndexError('Length of densesize is required to be higher than 0.')

  # Gettting some handy variables
  class_names = train_ds.class_names
  num_classes = len(class_names)

  # Cleaning keras backend to avoid piling up training phases
  tf.keras.backend.clear_session()

  # Definition of the model
  model = keras.Sequential()
  model.add(layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)))
  for i in range(len(convfilters)):
    initializer = tf.keras.initializers.HeNormal(seed=1)
    # Add to the model a suitable convolutional layer, use padding 'same' and activation 'relu'
    ,
    #
    # Add to the model a suitable max pooling layer
    ,
    #

  model.add(layers.Flatten())

  for j in range(len(densesize)):
    # Add to the model a Dense layer, use activation "relu" and initializer as a kernel initializer
 
    #
  model.add(layers.Dense(1, activation='sigmoid'))

  # Copile the model using a loss function as binary crossentropy (logits=False),
  # Adam with lr as learning rate and accuracy as metrics

  #

  model.summary()

  # Fit the model
  history = 
  #
  
  return model, history

In [None]:
### Exercise 15: Select suitable parameters for compile and fit function

# Do not modify the format of the variables
convfilters = []
convsize = []
densesize = []
lr =  # Careful with this value, around 0.01 should be fine
epochs =  # Around 20 should be fine
#

model, history = compile_and_fit_conv(convfilters, convsize, densesize, lr, epochs, train_ds, test_ds)

### Evaluation with convolutions

In [None]:
### Do not modify this cell, not an exercise

def plotting_prediction(history):
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']

  loss = history.history['loss']
  val_loss = history.history['val_loss']
  
  fig, ax = plt.subplots(ncols=2, figsize=np.array([2*6.4, 4.8]))

  ax[0].plot(np.arange(1,len(acc)+1), acc, label='Training Accuracy')
  ax[0].plot(np.arange(1,len(acc)+1), val_acc, label='Validation Accuracy')
  ax[0].legend()
  ax[0].set_title('Training and Validation Accuracy')
  ax[0].set_xlabel('Epochs')
  ax[0].set_ylabel('Accuracy')
  
  ax[1].plot(np.arange(1,len(loss)+1), loss, label='Training Loss')
  ax[1].plot(np.arange(1,len(loss)+1), val_loss, label='Validation Loss')
  ax[1].legend()
  ax[1].set_title('Training and Validation Loss')
  ax[1].set_xlabel('Epochs')
  ax[1].set_ylabel('Loss')

In [None]:
### Do not modify this cell, not an exercise

plotting_prediction(history)

In [None]:
### Exercise 16: Locate the training pictures

# Modify the following line
test_pictures_path = ''
#

sample_pictures = glob.glob(test_pictures_path)
print(sample_pictures)

# The output of this cell should be
# ['/content/drive/MyDrive/nebrija_data_science_student/02-neural_networks/datasets/test_01.jpg', 
# '/content/drive/MyDrive/nebrija_data_science_student/02-neural_networks/datasets/test_02.jpg', 
# '/content/drive/MyDrive/nebrija_data_science_student/02-neural_networks/datasets/test_03.jpg']

In [None]:
### Do not modify this cell, not an exercise

def test_other_pictures(list_of_pictures):
  for i in range(len(list_of_pictures)):
    print(f"Picture number {i}: {os.path.basename(list_of_pictures[i])}")
    test_picture = np.array(PIL.Image.open(list_of_pictures[i]))
    print(f"Picture size {test_picture.shape}")
    test_picture_resized = tf.expand_dims(tf.image.resize(test_picture, (img_height, img_width)),0)
    print(f"Piture size after resize {test_picture_resized.shape}")
    prediction = model.predict(test_picture_resized)
    if prediction > 0.5: 
      prediction_class = 1
    else:
      prediction_class = 0
    print(f"Model prediction: {prediction} - {class_names[prediction_class]}\n")

In [None]:
### Do not modify this cell, not an exercise

test_other_pictures(sample_pictures)

In [None]:
### Exercise 17: Store the model in your google drive for later use

# Modify the following line to include the corresponding method

#

In [None]:
### Exercise 18: Load the model and check the results are the same

# Delete the model first

#

# Then load it as 'model' from the file you have created previously
model = 
#

In [None]:
### Do not modify this cell, not an exercise

test_other_pictures(sample_pictures)