In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, "../")

import os
import torch
from PIL import Image as im
import pandas as pd
import numpy as np
from autogluon.vision import ImagePredictor, ImageDataset
import pickle
import datetime
from pathlib import Path
import cleanlab

from cross_validation_autogluon import cross_val_predict_autogluon_image_dataset, train_predict_autogluon

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

## Save data

Ideally save data to a M.2 SSD for fast read

In [None]:
!ls /Data/Food-101N_release/

This dataset does not have a "train" subfolder which is required by AutoGluon. Rename "images" subfolder to "train".

In [None]:
!mv /Data/Food-101N_release/images /Data/Food-101N_release/train

In [None]:
!ls /Data/Food-101N_release/train

## Read data

In [2]:
# original data deduped
DATA_PATH = "/Data/Food-101N_release/"

# read data from root folder
train_dataset, _, _ = \
    ImageDataset.from_folders(
        root=DATA_PATH,
    )

In [3]:
train_dataset["label_name"] = train_dataset.label.map(lambda idx: train_dataset.classes[idx])

In [4]:
train_dataset.head()

Unnamed: 0,image,label,label_name
0,/Data/Food-101N_release/train/apple_pie/000036ee3613531a745a05052e6a3ed7.jpg,0,apple_pie
1,/Data/Food-101N_release/train/apple_pie/00168d0c6df7fb7b7b786a22c2ff2f4a.jpg,0,apple_pie
2,/Data/Food-101N_release/train/apple_pie/00196077deceacae8f88408b7e0cc216.jpg,0,apple_pie
3,/Data/Food-101N_release/train/apple_pie/003ba1197bb2ffca00036bbd1bfae139.jpg,0,apple_pie
4,/Data/Food-101N_release/train/apple_pie/00861f70adfd37898114b83ff16978d0.jpg,0,apple_pie


In [5]:
train_dataset.classes

['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito',
 'bruschetta',
 'caesar_salad',
 'cannoli',
 'caprese_salad',
 'carrot_cake',
 'ceviche',
 'cheese_plate',
 'cheesecake',
 'chicken_curry',
 'chicken_quesadilla',
 'chicken_wings',
 'chocolate_cake',
 'chocolate_mousse',
 'churros',
 'clam_chowder',
 'club_sandwich',
 'crab_cakes',
 'creme_brulee',
 'croque_madame',
 'cup_cakes',
 'deviled_eggs',
 'donuts',
 'dumplings',
 'edamame',
 'eggs_benedict',
 'escargots',
 'falafel',
 'filet_mignon',
 'fish_and_chips',
 'foie_gras',
 'french_fries',
 'french_onion_soup',
 'french_toast',
 'fried_calamari',
 'fried_rice',
 'frozen_yogurt',
 'garlic_bread',
 'gnocchi',
 'greek_salad',
 'grilled_cheese_sandwich',
 'grilled_salmon',
 'guacamole',
 'gyoza',
 'hamburger',
 'hot_and_sour_soup',
 'hot_dog',
 'huevos_rancheros',
 'hummus',
 'ice_cream',
 'lasagna',
 'lobster_bisque',
 'lobster

In [6]:
train_dataset.groupby("label_name")["image"].count().reset_index()

Unnamed: 0,label_name,image
0,apple_pie,2467
1,baby_back_ribs,2581
2,baklava,2874
3,beef_carpaccio,2028
4,beef_tartare,2012
5,beet_salad,2921
6,beignets,2802
7,bibimbap,2675
8,bread_pudding,2982
9,breakfast_burrito,2752


## Test model training

In [None]:
train_dataset.iloc[:int(train_dataset.shape[0] * 0.2)].shape

In [None]:

model = "resnet50d"

holdout_frac = 0.2
n_splits = 5
time_limit = 6 * 3600
ngpus_per_trial = 1
batch_size = 64
epochs = 1

MODEL_PARAMS = {
    "model": model,
    "epochs": epochs,
    "holdout_frac": holdout_frac,
    "batch_size": batch_size
}

predictor = ImagePredictor(verbosity=0)

predictor.fit(
    train_data=train_dataset.iloc[:int(train_dataset.shape[0] * 0.2)],
    ngpus_per_trial=ngpus_per_trial,
    hyperparameters=MODEL_PARAMS,
    time_limit=time_limit,
    random_state=123,
)

In [None]:
features = predictor.predict_feature(train_dataset.iloc[:int(train_dataset.shape[0] * 0.2)])

## Run cross-validation with AutoGluon

In [9]:
%%time

# generate cross-validated predicted probabilities for various models so we can use them for ensemble scoring methods
models = [
    # "resnet18",
    # "resnet50d",
    "efficientnet_b1",
    "twins_pcpvt_base",
    # "swin_base_patch4_window7_224"
]

epochs = 100
holdout_frac = 0.2
n_splits = 5
time_limit = 6 * 3600
ngpus_per_trial = 1
batch_size = 64

# run cross-validation for each model
for model in models:
    
    print("----")
    print(f"Running cross-validation for model: {model}")

    MODEL_PARAMS = {
        "model": model,
        "epochs": epochs,
        "holdout_frac": holdout_frac,
        "batch_size": batch_size
    }

    # results of cross-validation will be saved to pickle files for each model/fold
    _ = \
         train_predict_autogluon(
            dataset=train_dataset, # train with NOISY LABELS
            classes=train_dataset.classes,
            out_folder=f"./food-101n_cv_{model}/", # save results of cross-validation in pickle files for each fold
            n_splits=n_splits,
            model_params=MODEL_PARAMS,
            time_limit=time_limit,
            ngpus_per_trial=ngpus_per_trial
        )

----
Running cross-validation for model: efficientnet_b1
----
Running Cross-Validation on Split: 0
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 659
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != efficientnet_b1
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/a447a215/.trial_0/config.yaml
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b1-533bc792.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1-533bc792.pth
Model efficientnet_b1 created, param count:                                         6642565
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 282.970257 samples/sec	accuracy=0.006875	lr=0.000100
Epoch[0] Batch [99]	Speed: 387.635876 samples/sec	accuracy=0.008750	lr=0.000100
Epoch[0] Batch [149]	Speed: 

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_efficientnet_b1/split_0/_test_pred_probs_split_0
Saving ./food-101n_cv_efficientnet_b1/split_0/_test_labels_split_0
Saving ./food-101n_cv_efficientnet_b1/split_0/_test_image_files_split_0
Saving ./food-101n_cv_efficientnet_b1/split_0/_test_indices_split_0
----
Running Cross-Validation on Split: 1
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 678
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != efficientnet_b1
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/73f70fa7/.trial_0/config.yaml
Model efficientnet_b1 created, param count:                                         6642565
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 284.276286 samples/sec	accuracy=0.008750	lr=0.000100
Epoch[0] Batch [99]	Speed: 390.842472 samples/sec	accuracy=0.008594	lr=0.000100
Epoch[0] Batch [149]	Speed: 390.105408 samples/sec	accuracy=0.009792	lr=0.000100
Epoch[0] Batch [199]	Speed: 389.827814 samples/sec	accuracy=0.009844	lr=0.000100
Epoch[0] Batch [249]	Speed: 386.834827 samples/sec	accurac

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_efficientnet_b1/split_1/_test_pred_probs_split_1
Saving ./food-101n_cv_efficientnet_b1/split_1/_test_labels_split_1
Saving ./food-101n_cv_efficientnet_b1/split_1/_test_image_files_split_1
Saving ./food-101n_cv_efficientnet_b1/split_1/_test_indices_split_1
----
Running Cross-Validation on Split: 2
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 113
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != efficientnet_b1
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/88abfb0f/.trial_0/config.yaml
Model efficientnet_b1 created, param count:                                         6642565
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 280.447405 samples/sec	accuracy=0.009687	lr=0.000100
Epoch[0] Batch [99]	Speed: 390.552406 samples/sec	accuracy=0.008281	lr=0.000100
Epoch[0] Batch [149]	Speed: 387.873968 samples/sec	accuracy=0.007292	lr=0.000100
Epoch[0] Batch [199]	Speed: 384.125728 samples/sec	accuracy=0.008125	lr=0.000100
Epoch[0] Batch [249]	Speed: 387.660126 samples/sec	accurac

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_efficientnet_b1/split_2/_test_pred_probs_split_2
Saving ./food-101n_cv_efficientnet_b1/split_2/_test_labels_split_2
Saving ./food-101n_cv_efficientnet_b1/split_2/_test_image_files_split_2
Saving ./food-101n_cv_efficientnet_b1/split_2/_test_indices_split_2
----
Running Cross-Validation on Split: 3
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 197
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != efficientnet_b1
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/a0a04230/.trial_0/config.yaml
Model efficientnet_b1 created, param count:                                         6642565
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 273.362203 samples/sec	accuracy=0.008750	lr=0.000100
Epoch[0] Batch [99]	Speed: 387.377245 samples/sec	accuracy=0.007812	lr=0.000100
Epoch[0] Batch [149]	Speed: 387.069927 samples/sec	accuracy=0.008333	lr=0.000100
Epoch[0] Batch [199]	Speed: 388.723582 samples/sec	accuracy=0.008281	lr=0.000100
Epoch[0] Batch [249]	Speed: 384.215764 samples/sec	accurac

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_efficientnet_b1/split_3/_test_pred_probs_split_3
Saving ./food-101n_cv_efficientnet_b1/split_3/_test_labels_split_3
Saving ./food-101n_cv_efficientnet_b1/split_3/_test_image_files_split_3
Saving ./food-101n_cv_efficientnet_b1/split_3/_test_indices_split_3
----
Running Cross-Validation on Split: 4
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 499
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != efficientnet_b1
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/31da6ca8/.trial_0/config.yaml
Model efficientnet_b1 created, param count:                                         6642565
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 276.473343 samples/sec	accuracy=0.007812	lr=0.000100
Epoch[0] Batch [99]	Speed: 389.901310 samples/sec	accuracy=0.009531	lr=0.000100
Epoch[0] Batch [149]	Speed: 390.380151 samples/sec	accuracy=0.009479	lr=0.000100
Epoch[0] Batch [199]	Speed: 387.884852 samples/sec	accuracy=0.010234	lr=0.000100
Epoch[0] Batch [249]	Speed: 388.599971 samples/sec	accurac

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_efficientnet_b1/split_4/_test_pred_probs_split_4
Saving ./food-101n_cv_efficientnet_b1/split_4/_test_labels_split_4
Saving ./food-101n_cv_efficientnet_b1/split_4/_test_image_files_split_4
Saving ./food-101n_cv_efficientnet_b1/split_4/_test_indices_split_4
----
Running cross-validation for model: twins_pcpvt_base
----
Running Cross-Validation on Split: 0
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 183
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != twins_pcpvt_base
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/a33a79c0/.trial_0/config.yaml
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vt3p-weights/twins_pcpvt_base-e5ecb09b.pth" to /root/.cache/torch/hub/checkpoints/twins_pcpvt_base-e5ecb09b.pth
Model twins_pcpvt_base created, param count:                                         43367269
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 162.686359 samples/sec	accuracy=0.007500	lr=0.000100
Epoch[0] Batch [99]	Speed: 188.483346 samples/sec	accuracy=0.009687	lr=0.000100
Epoch[0] Batch [14

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_twins_pcpvt_base/split_0/_test_pred_probs_split_0
Saving ./food-101n_cv_twins_pcpvt_base/split_0/_test_labels_split_0
Saving ./food-101n_cv_twins_pcpvt_base/split_0/_test_image_files_split_0
Saving ./food-101n_cv_twins_pcpvt_base/split_0/_test_indices_split_0
----
Running Cross-Validation on Split: 1
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 325
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != twins_pcpvt_base
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/15e64d0b/.trial_0/config.yaml
Model twins_pcpvt_base created, param count:                                         43367269
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 159.384079 samples/sec	accuracy=0.008438	lr=0.000100
Epoch[0] Batch [99]	Speed: 187.964858 samples/sec	accuracy=0.011406	lr=0.000100
Epoch[0] Batch [149]	Speed: 187.876587 samples/sec	accuracy=0.011771	lr=0.000100
Epoch[0] Batch [199]	Speed: 186.836704 samples/sec	accuracy=0.013906	lr=0.000100
Epoch[0] Batch [249]	Speed: 187.179167 samples/sec	accu

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_twins_pcpvt_base/split_1/_test_pred_probs_split_1
Saving ./food-101n_cv_twins_pcpvt_base/split_1/_test_labels_split_1
Saving ./food-101n_cv_twins_pcpvt_base/split_1/_test_image_files_split_1
Saving ./food-101n_cv_twins_pcpvt_base/split_1/_test_indices_split_1
----
Running Cross-Validation on Split: 2
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 217
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != twins_pcpvt_base
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/051d28cc/.trial_0/config.yaml
Model twins_pcpvt_base created, param count:                                         43367269
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 158.214266 samples/sec	accuracy=0.011562	lr=0.000100
Epoch[0] Batch [99]	Speed: 189.084325 samples/sec	accuracy=0.014063	lr=0.000100
Epoch[0] Batch [149]	Speed: 188.157652 samples/sec	accuracy=0.015312	lr=0.000100
Epoch[0] Batch [199]	Speed: 188.321342 samples/sec	accuracy=0.017109	lr=0.000100
Epoch[0] Batch [249]	Speed: 187.617444 samples/sec	accu

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_twins_pcpvt_base/split_2/_test_pred_probs_split_2
Saving ./food-101n_cv_twins_pcpvt_base/split_2/_test_labels_split_2
Saving ./food-101n_cv_twins_pcpvt_base/split_2/_test_image_files_split_2
Saving ./food-101n_cv_twins_pcpvt_base/split_2/_test_indices_split_2
----
Running Cross-Validation on Split: 3
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 690
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != twins_pcpvt_base
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/1f70ab09/.trial_0/config.yaml
Model twins_pcpvt_base created, param count:                                         43367269
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 161.745869 samples/sec	accuracy=0.011875	lr=0.000100
Epoch[0] Batch [99]	Speed: 189.691434 samples/sec	accuracy=0.011094	lr=0.000100
Epoch[0] Batch [149]	Speed: 189.369161 samples/sec	accuracy=0.011875	lr=0.000100
Epoch[0] Batch [199]	Speed: 189.109581 samples/sec	accuracy=0.012266	lr=0.000100
Epoch[0] Batch [249]	Speed: 188.971838 samples/sec	accu

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_twins_pcpvt_base/split_3/_test_pred_probs_split_3
Saving ./food-101n_cv_twins_pcpvt_base/split_3/_test_labels_split_3
Saving ./food-101n_cv_twins_pcpvt_base/split_3/_test_image_files_split_3
Saving ./food-101n_cv_twins_pcpvt_base/split_3/_test_indices_split_3
----
Running Cross-Validation on Split: 4
  Training model...


modified configs(<old> != <new>): {
root.misc.seed       42 != 389
root.misc.num_workers 4 != 16
root.img_cls.model   resnet101 != twins_pcpvt_base
root.train.batch_size 32 != 64
root.train.early_stop_max_value 1.0 != inf
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
}
Saved config to /dcai/src/experiments/food-101n/0f398de6/.trial_0/config.yaml
Model twins_pcpvt_base created, param count:                                         43367269
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 162.261233 samples/sec	accuracy=0.007188	lr=0.000100
Epoch[0] Batch [99]	Speed: 189.595278 samples/sec	accuracy=0.010156	lr=0.000100
Epoch[0] Batch [149]	Speed: 189.408798 samples/sec	accuracy=0.011875	lr=0.000100
Epoch[0] Batch [199]	Speed: 188.991127 samples/sec	accuracy=0.012500	lr=0.000100
Epoch[0] Batch [249]	Speed: 188.730991 samples/sec	accu

  Running model inference...
 Saving model outputs...
Saving ./food-101n_cv_twins_pcpvt_base/split_4/_test_pred_probs_split_4
Saving ./food-101n_cv_twins_pcpvt_base/split_4/_test_labels_split_4
Saving ./food-101n_cv_twins_pcpvt_base/split_4/_test_image_files_split_4
Saving ./food-101n_cv_twins_pcpvt_base/split_4/_test_indices_split_4
CPU times: user 2d 4h 32min 24s, sys: 9h 52min 40s, total: 2d 14h 25min 5s
Wall time: 2d 14h 55min 5s
