In [None]:
#hide
# !pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [None]:
#hide
from fastbook import *
from fastai.vision.widgets import *

# From Model to Production

## The Practice of Deep Learning

### Starting Your Project

### The State of Deep Learning

#### Computer vision

#### Text (natural language processing)

#### Combining text and images

#### Tabular data

#### Recommendation systems

#### Other data types

### The Drivetrain Approach

## Gathering Data

# clean
To download images with Bing Image Search, sign up at [Microsoft Azure](https://azure.microsoft.com/en-us/services/cognitive-services/bing-web-search-api/) for a free account. You will be given a key, which you can copy and enter in a cell as follows (replacing 'XXX' with your key and executing it):

In [None]:
import os
key = os.environ.get('AZURE_SEARCH_KEY', 'XXX')

In [None]:
search_images_bing

In [None]:
results = search_images_bing(key, 'grizzly bear')
ims = results.attrgot('contentUrl')
len(ims)

In [None]:
#hide
ims = ['http://3.bp.blogspot.com/-S1scRCkI3vY/UHzV2kucsPI/AAAAAAAAA-k/YQ5UzHEm9Ss/s1600/Grizzly%2BBear%2BWildlife.jpg']

In [None]:
dest = 'images/grizzly.jpg'
download_url(ims[0], dest)

In [None]:
im = Image.open(dest)
im.to_thumb(128,128)

In [None]:
bear_types = 'grizzly','black','teddy'
path = Path('bears')

In [None]:
if not path.exists():
    path.mkdir()
    for o in bear_types:
        dest = (path/o)
        dest.mkdir(exist_ok=True)
        results = search_images_bing(key, f'{o} bear')
        download_images(dest, urls=results.attrgot('contentUrl'))

In [None]:
fns = get_image_files(path)
fns

In [None]:
failed = verify_images(fns)
failed

In [None]:
failed.map(Path.unlink);

### Sidebar: Getting Help in Jupyter Notebooks

### End sidebar

## From Data to DataLoaders

In [None]:
bears = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(128))

In [None]:
dls = bears.dataloaders(path)

In [None]:
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
bears = bears.new(item_tfms=Resize(128, ResizeMethod.Squish))
dls = bears.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
bears = bears.new(item_tfms=Resize(128, ResizeMethod.Pad, pad_mode='zeros'))
dls = bears.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3))
dls = bears.dataloaders(path)
dls.train.show_batch(max_n=4, nrows=1, unique=True)

### Data Augmentation

In [None]:
bears = bears.new(item_tfms=Resize(128), batch_tfms=aug_transforms(mult=2))
dls = bears.dataloaders(path)
dls.train.show_batch(max_n=8, nrows=2, unique=True)

## Training Your Model, and Using It to Clean Your Data

In [None]:
bears = bears.new(
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    batch_tfms=aug_transforms())
dls = bears.dataloaders(path)

In [None]:
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [None]:
interp.plot_top_losses(5, nrows=1)

In [None]:
cleaner = ImageClassifierCleaner(learn)
cleaner

In [None]:
#hide
# for idx in cleaner.delete(): cleaner.fns[idx].unlink()
# for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)

## Turning Your Model into an Online Application

### Using the Model for Inference

In [None]:
learn.export()

In [None]:
path = Path()
path.ls(file_exts='.pkl')

In [None]:
learn_inf = load_learner(path/'export.pkl')

In [None]:
learn_inf.predict('images/grizzly.jpg')

In [None]:
learn_inf.dls.vocab

### Creating a Notebook App from the Model

In [None]:
btn_upload = widgets.FileUpload()
btn_upload

In [None]:
#hide
# For the book, we can't actually click an upload button, so we fake it
btn_upload = SimpleNamespace(data = ['images/grizzly.jpg'])

In [None]:
img = PILImage.create(btn_upload.data[-1])

In [None]:
out_pl = widgets.Output()
out_pl.clear_output()
with out_pl: display(img.to_thumb(128,128))
out_pl

In [None]:
pred,pred_idx,probs = learn_inf.predict(img)

In [None]:
lbl_pred = widgets.Label()
lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'
lbl_pred

In [None]:
btn_run = widgets.Button(description='Classify')
btn_run

In [None]:
def on_click_classify(change):
    img = PILImage.create(btn_upload.data[-1])
    out_pl.clear_output()
    with out_pl: display(img.to_thumb(128,128))
    pred,pred_idx,probs = learn_inf.predict(img)
    lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'

btn_run.on_click(on_click_classify)

In [None]:
#hide
#Putting back btn_upload to a widget for next cell
btn_upload = widgets.FileUpload()

In [None]:
VBox([widgets.Label('Select your bear!'), 
      btn_upload, btn_run, out_pl, lbl_pred])

### Turning Your Notebook into a Real App

In [None]:
#hide
# !pip install voila
# !jupyter serverextension enable --sys-prefix voila 

### Deploying your app

## How to Avoid Disaster

### Unforeseen Consequences and Feedback Loops

## Get Writing!

## Questionnaire

1. Provide an example of where the bear classification model might work poorly in production, due to structural or style differences in the training data.

    - might not recognize photos taken on a low-quality camera

1. Where do text models currently have a major deficiency?

    - can only produce appropriate answers, not correct ones

1. What are possible negative societal implications of text generation models?

    - it's easier to autogenerate misinformation than information, which could lead to overwhelming volumes of nonsense

1. In situations where a model might make mistakes, and those mistakes could be harmful, what is a good alternative to automating a process?

    - automated assistance to a manual process

1. What kind of tabular data is deep learning particularly good at?

    - categorical columns with large numbers of variants

1. What's a key downside of directly using a deep learning model for recommendation systems?

    - it may recommend people things they already knew about, or that people like them already knew about, instead of novel but appropriate things

1. What are the steps of the Drivetrain Approach?

    - define your objective
    - figure out what levers you have to achieve your objective
    - figure out what data you can collect to inform your lever pulling
    - do it

1. How do the steps of the Drivetrain Approach map to a recommendation system?

    - your objective is to increase sales
    - your levers are recommending things
    - your data is what things people often buy together
    - your model can predict what things people might buy if they've already bought some other things

1. Create an image recognition model using data you curate, and deploy it on the web.

    - ehh maybe later

1. What is `DataLoaders`?

    - fastai class for specifying how to train a model with some data

1. What four things do we need to tell fastai to create `DataLoaders`?

    - What kinds of data we are working with
    - How to get the list of items
    - How to label these items
    - How to create the validation set

1. What does the `splitter` parameter to `DataBlock` do?

    - decides how to pick the validation set

1. How do we ensure a random split always gives the same validation set?

    - use a constant seed

1. What letters are often used to signify the independent and dependent variables?

    - x independent, y dependent

1. What's the difference between the crop, pad, and squish resize approaches? When might you choose one over the others?

    - crop cuts the edges of an image to make it square, potentially losing some details of the image
    - pad adds zeroes to an image to make it square, potentially losing some resolution and wasting computation
    - squish scales an image to make it square, potentially distorting the image

1. What is data augmentation? Why is it needed?

    - adding transformed copies of the data in ways that are deemed not to affect its meaning, to help the model learn that those transformations are irrelevant (and focus it more on the relevant details).

1. What is the difference between `item_tfms` and `batch_tfms`?

    - `batch_tfms` are transformations that may run in parallel on a batch, which is faster than transforming individually

1. What is a confusion matrix?

    - a matrix whose (i,j)th element is the number of times the model labelled something as i when it was actually j

1. What does `export` save?

    - the architecture, the parameters, and the dataloaders

1. What is it called when we use a model for getting predictions, instead of training?

    - inference

1. What are IPython widgets?

    - javascript widgets that are connected to python handlers

1. When might you want to use CPU for deployment? When might GPU be better?

    - CPUs may be cheaper / more easily available
    - GPUs are better for very parallel workloads (e.g. when many inputs are available at once for batch processing)

1. What are the downsides of deploying your app to a server, instead of to a client (or edge) device such as a phone or PC?

    - your app requires a network connection to operate
    - your users may have privacy concerns if you need to hand over potentially-sensitive data

1. What are three examples of problems that could occur when rolling out a bear warning system in practice?

    - bears
    - bears
    - bears

1. What is "out-of-domain data"?

    - data that is unlike the training set, e.g. pictures at night instead of during the day

1. What is "domain shift"?

    - when e.g. your customer demographics change relative to what they were when you trained your model

1. What are the three steps in the deployment process?

    - fully manual
    - scope-limited rollout
    - gradual full rollout

### Further Research

1. Consider how the Drivetrain Approach maps to a project or problem you're interested in.
1. When might it be best to avoid certain types of data augmentation?
1. For a project you're interested in applying deep learning to, consider the thought experiment "What would happen if it went really, really well?"
1. Start a blog, and write your first blog post. For instance, write about what you think deep learning might be useful for in a domain you're interested in.