# Food classifier

Dataset from ETH Zurich: [link](https://www.vision.ee.ethz.ch/datasets_extra/food-101/)

In [None]:
from fastai import *
from fastai.vision import *

In [None]:
dest_path = pathlib.Path(r"D:\fastai\food")
fname = dest_path/"food-101.tar.gz"

This somehow seems broken

In [None]:
# url_path = untar_data(url=URLs.FOOD, fname=fname, dest=dest_path); URLs.FOOD # http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz

So get the file manually, just download using your browser and move the file to where you want --> `fname`

In [None]:
# tarfile.open(fname, "r:gz").extractall(dest_path)

In [None]:
target_dir = dest_path/"food-101/images"; target_dir

This loads all available images. Which seems to be 1k per class (there are 101 classes). This is a lot! If you want to speed this up and are okay with a smaller sample of images skip the following cell and use the next code cell instead.

In [None]:
# tfms = get_transforms()
# np.random.seed(42)
# data = ImageDataBunch.from_folder(path=target_dir, train=".", valid_pct=.2, bs=32, ds_tfms=tfms, size=224).normalize(imagenet_stats)

Subsampling images to some smaller number using `n_img_per_dir`

In [None]:
n_img_per_dir = 50 # number of images to select for each food dir
img_dirs = [] #["nachos", "lobster_roll_sandwich", "ice_cream", "hummus"]
n_img_dirs = 2 if img_dirs is None or len(img_dirs)==0 else len(img_dirs) # if None then all are used. otherwise please choose an integer

food_dirs = {_dir.name: _dir for _dir in target_dir.ls() if _dir.is_dir() if _dir.name != "models"}
if isinstance(n_img_dirs, int) and ((img_dirs is None) or len(img_dirs)==0):
    if n_img_dirs < len(food_dirs):
        img_dirs = np.random.choice(list(food_dirs.keys()), size=n_img_dirs, replace=False)
        
food_dirs = {_name: food_dirs[_name] for _name in img_dirs}
    
food_fnames = {_food: np.random.choice(["{}/{}".format(_img.parent.name, _img.name) for _img in _dir.ls()], 
                                       size=n_img_per_dir, replace=False)
               for _food, _dir in food_dirs.items()}

df = pd.DataFrame({
    "name": np.concatenate([food_fnames[_food] for _food in sorted(food_fnames)]),
    "label": np.concatenate([np.array([_food for _ in food_fnames[_food]]) for _food in sorted(food_fnames)])
})

print("# dirs", len(food_dirs), ", ".join(food_dirs.keys()))
print("# images", len(df))

In [None]:
df.head()

In [None]:
df["label"].value_counts(normalize=True)

In [None]:
tfms = get_transforms()
np.random.seed(42)
data = ImageDataBunch.from_df(path=target_dir, df=df, valid_pct=.2, bs=32, ds_tfms=tfms, size=224).normalize(imagenet_stats)
# data = ImageDataBunch.from_df(path=target_dir, df=df, valid_pct=.2, bs=8, ds_tfms=tfms, size=224).normalize(imagenet_stats)

In [None]:
data.show_batch(rows=3)

In [None]:
print("classes:", data.classes)
print("c", data.c)
print("num train", len(data.train_ds))
print("num valid", len(data.valid_ds))

In [None]:
learn = cnn_learner(data, models.resnet34, metrics=error_rate)
# learn = cnn_learner(data, models.resnet50, metrics=error_rate)

In [None]:
learn.fit_one_cycle(4)

In [None]:
learn.recorder.plot_losses()

In [None]:
learn.save("stage-1-34")
# learn.save("stage-1-50")

In [None]:
learn.export(target_dir/"export.pkl")

In [None]:
learn.load("stage-1-34")

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.unfreeze()

In [None]:
learn.fit_one_cycle(4, max_lr=slice(1e-6, 1e-4))

## Inspection

In [None]:
learn.show_results(ds_type=DatasetType.Train, rows=2)

In [None]:
learn.show_results(ds_type=DatasetType.Valid, rows=2)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(6)

In [None]:
interp.plot_confusion_matrix()

In [None]:
interp.most_confused(min_val=1)

## Making predictions

In [None]:
img_fname = r"strawberry_shortcake\13097.jpg" # r"guacamole\4541.jpg" r"strawberry_shortcake\13097.jpg"
img = open_image(target_dir/img_fname)
img

In [None]:
learn2 = load_learner(path=target_dir, file="export.pkl")

In [None]:
i2c = {i: c for c,i in learn2.data.c2i.items()}

In [None]:
pred_class, pred_idx, probs = learn2.predict(img)
topk = 2

top_idx = torch.topk(probs, topk).indices
print("pred_class", pred_class)
print("pred_idx", pred_idx)

print("\nPredictions")
for _ix in top_idx.numpy():
    print("\t{} = {:.2f} %".format(i2c[_ix], probs[_ix]*100))
