# bear_classifier

In [None]:
#| default_exp bear_classifier

In [None]:
#| export
# ! [ -e /content ] && pip install -Uqq fastbook
import pillow_avif
import fastbook
fastbook.setup_book()

In [None]:
#| export
from fastbook import *
from fastai.vision.widgets import *
from fastai.vision.all import *

# From Model to Production

In [None]:
#| export
bear_types = 'grizzly','black','teddy'
path = Path('bears')

In [None]:
#| hide
# if not path.exists():
#     path.mkdir()
#     for o in bear_types:
#         dest = (path/o)
#         dest.mkdir(exist_ok=True)
#         results = search_images_bing(key, f'{o} bear')
#         download_images(dest, urls=results.attrgot('contentUrl'))

The script below is designed to download images of different types of bears. 
It first ensures that a target directory exists. Then, for each specified "bear type", it creates a subdirectory. It uses the search_images_ddg function to find image URLs for that bear type via DuckDuckGo, filters out any invalid URLs, and then downloads the valid images into the respective subdirectory. Finally, it confirms how many images were downloaded for each type.

In [None]:
#| export
if not path.exists():
    path.mkdir()

if not path.exists():
    path.mkdir()

for o in bear_types:
    print(f"\nProcessing {o} bears...")
    dest = (path/o)
    dest.mkdir(exist_ok=True)

    results = search_images_ddg(f'{o} bear')
    print(f"Found {len(results)} search results")

    # results are already URLs, so use them directly
    valid_urls = [url for url in results if url is not None]
    print(f"Valid URLs: {len(valid_urls)}")

    if valid_urls:
        print(f"Downloading to {dest}...")
        download_images(dest, urls=valid_urls)
        # Check if files were actually downloaded
        downloaded_files = list(dest.glob('*'))
        print(f"Downloaded {len(downloaded_files)} files")
    else:
        print("No valid URLs found!")

In [None]:
#| hide
## check search_images)ddg work
# results = search_images_ddg('grizzly bear')
# print(f"Number of results: {len(results)}")
# print(f"First 3 results: {results[:3]}")

In [None]:
#| hide
## debudding code - download was not working
# Let's examine the structure of the results
# results = search_images_ddg('grizzly bear')
# print(f"Number of results: {len(results)}")

# # Look at the first result to see its structure
# if len(results) > 0:
#     first_result = results[0]
#     print(f"First result type: {type(first_result)}")
#     print(f"First result: {first_result}")

#     # Check what attributes are available
#     if hasattr(first_result, '__dict__'):
#         print(f"Available attributes: {first_result.__dict__.keys()}")

#     # Try different possible URL attributes
#     url_attrs = ['contentUrl', 'url', 'src', 'image_url', 'thumbnail']
#     for attr in url_attrs:
#         if hasattr(first_result, attr):
#             value = getattr(first_result, attr)
#             print(f"{attr}: {value}")

In [None]:
#| export
fns = get_image_files(path)
fns

In [None]:
#| export
failed = verify_images(fns)
print(f"Failed files: {len(failed)}")
# failed

In [None]:
#| export
failed.map(Path.unlink);

## From Data to DataLoaders

In [None]:
#| export
bears = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(128))

In [None]:
#| export
dls = bears.dataloaders(path)

In [None]:
#| export
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
#| hide
bears = bears.new(item_tfms=Resize(128, ResizeMethod.Squish))
dls = bears.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
#| hide
bears = bears.new(item_tfms=Resize(128, ResizeMethod.Pad, pad_mode='zeros'))
dls = bears.dataloaders(path)
dls.valid.show_batch(max_n=4, nrows=1)

In [None]:
#| hide
bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3))
dls = bears.dataloaders(path)
dls.train.show_batch(max_n=4, nrows=1, unique=True)

### Data Augmentation

In [None]:
#| hide
bears = bears.new(item_tfms=Resize(128), batch_tfms=aug_transforms(mult=2))
dls = bears.dataloaders(path)
dls.train.show_batch(max_n=8, nrows=2, unique=True)

## Training Your Model, and Using It to Clean Your Data

In [None]:
#| export
bears = bears.new(
    item_tfms=RandomResizedCrop(224, min_scale=0.5),
    batch_tfms=aug_transforms())
dls = bears.dataloaders(path)

In [None]:
#| export
learn = vision_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

In [None]:
#| hide
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()

In [None]:
#| hide
interp.plot_top_losses(5, nrows=1)

In [None]:
#| hide
cleaner = ImageClassifierCleaner(learn)
cleaner

In [None]:
#hide
# for idx in cleaner.delete(): cleaner.fns[idx].unlink()
# for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)

## Turning Your Model into an Online Application

### Using the Model for Inference

In [None]:
#| export
learn.export()

In [None]:
#| hide
path = Path()
path.ls(file_exts='.pkl')

In [None]:
#| hide
learn_inf = load_learner(path/'export.pkl')

In [None]:
#| hide
learn_inf.predict('images/grizzly.jpg')

In [None]:
#| export
learn_inf.dls.vocab

### Deploying your app

#### Hugging Face - Gradio
- create project on Higging Face Hub
- clone Higging Face Hub directory
- write code e.g. `app.py` and add model `*.pkl`. The model should be uploaded using `git-lfs`
- after git push the app will be deployed on Gradio. Note that Gradio will produce a warning about the `.pkl` file

In [None]:
#| export
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from huggingface_hub import push_to_hub_fastai

# Assume 'learn' is your fastai Learner object
learn = load_learner('export.pkl')
repo_id = "algorithmicLife/bear-classifier"
push_to_hub_fastai(learner=learn, repo_id=repo_id)

## Questionnaire

1. Provide an example of where the bear classification model might work poorly in production, due to structural or style differences in the training data.
1. Where do text models currently have a major deficiency?
1. What are possible negative societal implications of text generation models?
1. In situations where a model might make mistakes, and those mistakes could be harmful, what is a good alternative to automating a process?
1. What kind of tabular data is deep learning particularly good at?
1. What's a key downside of directly using a deep learning model for recommendation systems?
1. What are the steps of the Drivetrain Approach?
1. How do the steps of the Drivetrain Approach map to a recommendation system?
1. Create an image recognition model using data you curate, and deploy it on the web.
1. What is `DataLoaders`?
1. What four things do we need to tell fastai to create `DataLoaders`?
1. What does the `splitter` parameter to `DataBlock` do?
1. How do we ensure a random split always gives the same validation set?
1. What letters are often used to signify the independent and dependent variables?
1. What's the difference between the crop, pad, and squish resize approaches? When might you choose one over the others?
1. What is data augmentation? Why is it needed?
1. What is the difference between `item_tfms` and `batch_tfms`?
1. What is a confusion matrix?
1. What does `export` save?
1. What is it called when we use a model for getting predictions, instead of training?
1. What are IPython widgets?
1. When might you want to use CPU for deployment? When might GPU be better?
1. What are the downsides of deploying your app to a server, instead of to a client (or edge) device such as a phone or PC?
1. What are three examples of problems that could occur when rolling out a bear warning system in practice?
1. What is "out-of-domain data"?
1. What is "domain shift"?
1. What are the three steps in the deployment process?

### Further Research

1. Consider how the Drivetrain Approach maps to a project or problem you're interested in.
1. When might it be best to avoid certain types of data augmentation?
1. For a project you're interested in applying deep learning to, consider the thought experiment "What would happen if it went really, really well?"
1. Start a blog, and write your first blog post. For instance, write about what you think deep learning might be useful for in a domain you're interested in.