In [4]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

from keras import *
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import RMSprop
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU

from luigi.contrib.external_program import ExternalProgramTask
from luigi.parameter import IntParameter, Parameter
from luigi import LocalTarget, Task

<img src="img/title.png" width="90%">

# About this Workshop

- Learn what neural nets are and what the big deal is with deep learning
- Interactively build a model that differentiates between fruits on images
- Get a glimpse of production-readiness
- Learn about Luigi pipelines and their main components
- Write your production ready pipeline
- Get an overview of luigis modules
- Get an overview about DVC and how to implement pipelines
- Use TensorFlow Serving to deploy your model

# About us

<img src="img/about_us.png" width="90%">

<img src="img/a-2729781_1280.png">
https://pixabay.com/de/a-ich-ai-anatomie-2729781/

# What is Machine Learning (ML)

<img src="img/supervised.jpg" width="90%">

# A typical ML Workflow

<img src="img/workflow.jpg">

# Why use validation and test data?

<img src="img/overfitting_explained.jpg">

# Validation methods

<img src="img/validation.jpg">

# What are neural nets?

<img src="img/neural_net_brain.jpeg">

# Perceptrons

<img src="img/perceptron.jpg">

# Activation functions normalise input

- every problem can be described with a mathematical function

<img src="img/activation_functions.jpg">

# Activation functions make non-linearity possible

- non-linear activation functions allow us to approximate ANY mathematical formula with neural nets

<img src="img/non_linearity.png" width="90%">

# Multi-Layer Perceptrons

<img src="img/mlp.jpg">

# How does a neural net learn?

<img src="img/nn_learn.jpg">

# How does a neural net learn?

## The Softmax function

<img src="img/softmax.jpg">

# How does a neural net learn?

## Cross-entropy

<img src="img/cross-entropy.jpg">

# How does a neural net learn?

## Backpropagation

<img src="img/backpropagation.jpg">

# How does a neural net learn?

## Gradient descent optimization

<img src="img/gradient_descent.jpg">

# Deep Learning

<img src="img/deep_learning.jpg">

# Deep Learning in the wild

<img src="img/dl_tasks.png">

# How does a computer learn to "see"?

<img src="img/cnn_intro.jpg" width="70%">

# How does a computer learn to "see"?

<img src="img/image_input.jpg">

# Jupyter Lab and Python

- i(nteractive) Python
- browser based notebook
- code + markdown + output

<img src="img/jupyter.png">

# Jupyter tips'n'tricks

- ?: search documentation

In [5]:
len?

- ??: look at source code

In [6]:
len??

- tab: autocomplete
- shift + tab: look at function call

<img src="img/jupyter_tip.png">

# Visualization with Matplotlib

- Matplotlib is a Python 2D plotting library
- Can be used in Python scripts, Python/IPython shells, Jupyter notebooks

<img src="img/matplotlib.png">

# Matplotlib

<img src="img/do_yourself.jpg">

# Convolutional Neural Nets

<img src="img/mlp_cnn.jpg">

# ConvNets

<img src="img/cnn_architecture.jpg">

# ConvNets

<img src="img/cnn_layers.jpg">

# ConvNets

<img src="img/cnn.jpg">

# Evolution of neural nets for image recognition

<img src="img/history_nn.jpg">

# CNN architectures

<img src="img/cnn_architectures.jpg" width="80%">

# Introduction to TensorFlow

<img src="img/keras-on-tensorflow-in-r-python-14-638.png">

# What are tensors?

<img src="img/tensor_array.png" width="50%">

# Tensors = multidimensional arrays

<img src="img/tensors.jpg" width="60%">

# Tensor "Flow"

<img src="img/graph.jpeg">

# Graphs in TensorBoard

<img src="img/tensorboard_example.png">

# Keras High-Level API for TensorFlow

<img src="img/keras_1.png">

# Keras APIs

<img src="img/keras_apis.png">

# Keras layers

<img src="img/keras_layers.png">

# Endless possibilities

<img src="img/keras_possibilities.png">

# It works, now DEPLOY it!

<img src="img/deploy_to_prod.jpg">

# Production ready

<img src="img/production.png" width="90%">

# A bit about Luigi 

Luigi helps to stitch long running tasks together into pipelines
    
It contains a wide toolbox of task templates (e.g. Hive, Pig, Spark, Python)

# How to compose workflows? 

A workflow consists of Targets, Tasks and Parameters

Targets correspond to a file or a database entry or some other kind of checkpoint

Tasks consume Targets of other tasks, run a computation, then output a target

Parameters take care of task parameterization

# Targets

- Files on disk or database entries
- Checkpoints that prevent tasks from multiple executions
- A lot of implementations already exist in the Luigi framework
- LocalTarget (File), RemoteTarget (SSH), HDFSTarget, MySqlTarget, ...

# Targets

<img src="img/luigi_targets.png" width="90%">

# Tasks

- Implement the actual processing
- Consume targets, process data and save results in new target 
- Respect dependencies to other tasks
- Implemented via Python-Classes

# Tasks

<img src="img/luigi_nice.png" width="90%">

# Parameters

- like constructur parameters
- Luigi takes care of parameter validation
- Again, a lot of implementations already exist in the Luigi framework
- IntParameter, BoolParameter, DateParameter, etc...

# Parameters

<img src="img/luigi_parameters.png" width="70%">

<img src="img/do_yourself.jpg">

# How would a production-ready Workflow look like?

- Download the dataset
    
- Extract the data

- Create a preprocessing configuration

- Run the baseline validation

- Train the model

- Evaluate the model

# Workflow

<img src="img/workflow_luigi.png" width="70%">

<img src="img/do_yourself.jpg">

# DVC - Data Version Control

# Building pipelines in DVC

# Utilize the power of Git

# Reproduce experiments

<img src="img/do_yourself.jpg">

# Data Engineering

# Development vs Experiments

<img src="img/dev_workflow.png" width="90%">

# CI/CD for Experiments

<img src="img/ci_ds.png" width="90%">

# Automate and deploy

# Jenkins

<img src="img/do_yourself.jpg">

# TensorFlow Serving

<img src="img/tf_serving.png" width="80%">

# TensorFlow Serving

- ModelServer as Server-runtime for ML models
- Can natively handle TensorFlow graphs
- Highly flexible
- Designed for production use

# TensorFlow Serving in Detail

<img src="img/tf_serving_detail.png">

# TensorFlow Serving Components

- Protobuf file for graph, checkpoint files for weights (if not frozen)
- Version strategy defines how many models are loaded into memory
- Clients request inferences via gRPC or REST (yes REST!)
- TF-Serving consists of pluggable components, namely sources, loaders and version strategies  

<img src="img/do_yourself.jpg">

# Integrate pre-processing into model

- The pre-processing and the model are tightly coupled 
- Datagenerator can be pickled to make training reproducable 
- But how can the client be sure to do the "right" thing?

## Possible ways:

- The client just "knows"
- Wrap service around model and expose an API 
- Built the pre-processing right into the model

In [None]:
def pre_process(x):
    resized = tf.image.resize_images(x, size=[100, 100]) 
    max_color = tf.constant(255, dtype=tf.float32) 
    rescaled = tf.divide(resized, max_color)
    return rescaled

additional = Lambda(pre_process, output_shape=(100, 100, 3), name="preprocessing")

In [None]:
prod_model = Sequential() 
prod_model.add(InputLayer(input_shape=(None, None, 3))) 
prod_model.add(additional)
for layer in model.layers:
    prod_model.add(layer)