In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, "../")

import os
import torch
from PIL import Image as im
import pandas as pd
import numpy as np
from autogluon.vision import ImagePredictor, ImageDataset
import pickle
import datetime
from pathlib import Path
import cleanlab

from cross_validation_autogluon import cross_val_predict_autogluon_image_dataset, train_predict_autogluon

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

  from .autonotebook import tqdm as notebook_tqdm


## Save data

Ideally save data to a M.2 SSD for fast read

In [2]:
!wget -nc 'https://cleanlab-public.s3.amazonaws.com/LabelErrorDetectionBenchmarks/food-101n/data/Food-101N_release.zip'

File ‘Food-101N_release.zip’ already there; not retrieving.



In [3]:
# !unzip './Food-101N_release.zip'

This dataset does not have a "train" subfolder which is required by AutoGluon. Rename "images" subfolder to "train".

In [4]:
# !mv ./Food-101N_release/images ./Food-101N_release/train

In [5]:
# !ls ./Food-101N_release/train

## Read data

In [6]:
# original data deduped
DATA_PATH = "./Food-101N_release/"

# read data from root folder
train_dataset, _, _ = \
    ImageDataset.from_folders(
        root=DATA_PATH,
    )

In [7]:
train_dataset["label_name"] = train_dataset.label.map(lambda idx: train_dataset.classes[idx])

In [8]:
train_dataset.head()

Unnamed: 0,image,label,label_name
0,/home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/Food-101N_release/train/apple_pie/000036ee3613531a745a05052e6a3ed7.jpg,0,apple_pie
1,/home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/Food-101N_release/train/apple_pie/00168d0c6df7fb7b7b786a22c2ff2f4a.jpg,0,apple_pie
2,/home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/Food-101N_release/train/apple_pie/00196077deceacae8f88408b7e0cc216.jpg,0,apple_pie
3,/home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/Food-101N_release/train/apple_pie/003ba1197bb2ffca00036bbd1bfae139.jpg,0,apple_pie
4,/home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/Food-101N_release/train/apple_pie/00861f70adfd37898114b83ff16978d0.jpg,0,apple_pie


In [9]:
train_dataset.classes

['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito',
 'bruschetta',
 'caesar_salad',
 'cannoli',
 'caprese_salad',
 'carrot_cake',
 'ceviche',
 'cheese_plate',
 'cheesecake',
 'chicken_curry',
 'chicken_quesadilla',
 'chicken_wings',
 'chocolate_cake',
 'chocolate_mousse',
 'churros',
 'clam_chowder',
 'club_sandwich',
 'crab_cakes',
 'creme_brulee',
 'croque_madame',
 'cup_cakes',
 'deviled_eggs',
 'donuts',
 'dumplings',
 'edamame',
 'eggs_benedict',
 'escargots',
 'falafel',
 'filet_mignon',
 'fish_and_chips',
 'foie_gras',
 'french_fries',
 'french_onion_soup',
 'french_toast',
 'fried_calamari',
 'fried_rice',
 'frozen_yogurt',
 'garlic_bread',
 'gnocchi',
 'greek_salad',
 'grilled_cheese_sandwich',
 'grilled_salmon',
 'guacamole',
 'gyoza',
 'hamburger',
 'hot_and_sour_soup',
 'hot_dog',
 'huevos_rancheros',
 'hummus',
 'ice_cream',
 'images',
 'lasagna',
 'lobster_bisque'

In [10]:
train_dataset.groupby("label_name")["image"].count().reset_index()

Unnamed: 0,label_name,image
0,apple_pie,2467
1,baby_back_ribs,2581
2,baklava,2874
3,beef_carpaccio,2028
4,beef_tartare,2012
5,beet_salad,2921
6,beignets,2802
7,bibimbap,2675
8,bread_pudding,2982
9,breakfast_burrito,2752


In [11]:
path_verified_train = "./Food-101N_release/meta/verified_train.tsv"

df_verified_train = pd.read_csv(path_verified_train, sep='\t')

## Test model training

In [12]:
train_dataset.iloc[:int(train_dataset.shape[0] * 0.2)].shape

(62001, 3)

In [14]:
model = "resnet50d"

holdout_frac = 0.2
n_splits = 5
time_limit = 6 * 3600
ngpus_per_trial = 1
batch_size = 64
epochs = 1

MODEL_PARAMS = {
    "model": model,
    "epochs": epochs,
    "holdout_frac": holdout_frac,
    "batch_size": batch_size
}

predictor = ImagePredictor(verbosity=0)

predictor.fit(
    train_data=train_dataset.iloc[:int(train_dataset.shape[0] * 0.2)],
    ngpus_per_trial=ngpus_per_trial,
    hyperparameters=MODEL_PARAMS,
#     time_limit=time_limit,
    random_state=123,
    time_limit = 30,
)

modified configs(<old> != <new>): {
root.train.early_stop_max_value 1.0 != inf
root.train.batch_size 32 != 64
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 1
root.img_cls.model   resnet101 != resnet50d
root.misc.seed       42 != 218
root.misc.num_workers 4 != 8
}
Saved config to /home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/55bf9369/.trial_0/config.yaml
Model resnet50d created, param count:                                         23572342
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 262.550975 samples/sec	accuracy=0.047813	lr=0.000100
Epoch[0] Batch [99]	Speed: 301.588792 samples/sec	accuracy=0.049844	lr=0.000100
`time_limit=29.999868154525757` reached, exit early...


<autogluon.vision.predictor.predictor.ImagePredictor at 0x7fe258120400>

In [15]:
# features = predictor.predict_feature(train_dataset.iloc[:int(train_dataset.shape[0] * 0.2)])

## Run cross-validation with AutoGluon

In [16]:
%%time

# generate cross-validated predicted probabilities for various models so we can use them for ensemble scoring methods
models = [
    # "resnet18",
    # "resnet50d",
    "efficientnet_b1",
    "twins_pcpvt_base",
    # "swin_base_patch4_window7_224"
]

epochs = 100
holdout_frac = 0.2
n_splits = 5
# time_limit = 6 * 3600
ngpus_per_trial = 1
batch_size = 64

# run cross-validation for each model
for model in models:
    
    print("----")
    print(f"Running cross-validation for model: {model}")

    MODEL_PARAMS = {
        "model": model,
        "epochs": epochs,
        "holdout_frac": holdout_frac,
        "batch_size": batch_size
    }
    out_folder = f"./noxval_food-101n_cv_{model}/"

    # results of cross-validation will be saved to pickle files for each model/fold
    _ = \
         train_predict_autogluon(
            dataset=train_dataset, # train with NOISY LABELS
            classes=train_dataset.classes,
            out_folder=out_folder, # save results of cross-validation in pickle files for each fold
            n_splits=n_splits,
            model_params=MODEL_PARAMS,
            time_limit=30,
            ngpus_per_trial=ngpus_per_trial
        )
    
    
    images = np.load(out_folder + "images.npy", allow_pickle=True)
    
    # instantiate DataFrame with all training data
    df_image_paths = pd.DataFrame({
        "class_name/key": pd.Series(images).map(lambda f: "/".join(Path(f).parts[-2:]))
    })

    # join to append verification_label column
    df_image_paths_w_verified = df_image_paths.merge(df_verified_train, on="class_name/key", how="left")

    # subset of data with verified labels
    verified_subset_mask = ~df_image_paths_w_verified.verification_label.isnull().values

    df_image_paths_w_verified.head()

----
Running cross-validation for model: efficientnet_b1
training...


modified configs(<old> != <new>): {
root.train.early_stop_max_value 1.0 != inf
root.train.batch_size 32 != 64
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
root.img_cls.model   resnet101 != efficientnet_b1
root.misc.seed       42 != 621
root.misc.num_workers 4 != 8
}
Saved config to /home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/8bdcea18/.trial_0/config.yaml
Model efficientnet_b1 created, param count:                                         6642565
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]
Epoch[0] Batch [49]	Speed: 325.397917 samples/sec	accuracy=0.009062	lr=0.000100
Epoch[0] Batch [99]	Speed: 369.010450 samples/sec	accuracy=0.009531	lr=0.000100
Epoch[0] Batch [149]	Speed: 372.420639 samples/sec	accuracy=0.009792	lr=0.000100
`time_limit=29.99986958503723` reached, exit early...


predicting...


  y_pred_proba[class_ids] = y_pred_proba['image_proba'].to_list()
  y_pred_proba[class_ids] = y_pred_proba['image_proba'].to_list()


Saving to numpy files in this folder: ./food-101n_cv_efficientnet_b1/
saving pred_probs...
saving noisy_labels...
saving images...
saving indices...
saving predictor...
----
Running cross-validation for model: twins_pcpvt_base
training...


modified configs(<old> != <new>): {
root.train.early_stop_max_value 1.0 != inf
root.train.batch_size 32 != 64
root.train.early_stop_baseline 0.0 != -inf
root.train.early_stop_patience -1 != 10
root.train.epochs    200 != 100
root.img_cls.model   resnet101 != twins_pcpvt_base
root.misc.seed       42 != 206
root.misc.num_workers 4 != 8
}
Saved config to /home/ubuntu/label-error-detection-benchmarks/src/experiments/food-101n/09099c1f/.trial_0/config.yaml
Model twins_pcpvt_base created, param count:                                         43367269
AMP not enabled. Training in float32.
Disable EMA as it is not supported for now.
Start training from [Epoch 0]


RuntimeError: Unexpected error happened during fit: { 'args': "{'img_cls': {'model': 'twins_pcpvt_base', 'pretrained': True, "
          "'global_pool_type': None}, 'data': {'img_size': None, 'input_size': "
          "None, 'crop_pct': 0.99, 'mean': None, 'std': None, 'interpolation': "
          "'', 'validation_batch_size_multiplier': 1}, 'optimizer': {'opt': "
          "'sgd', 'opt_eps': None, 'opt_betas': None, 'momentum': 0.9, "
          "'weight_decay': 0.0001, 'clip_grad': None, 'clip_mode': 'norm'}, "
          "'train': {'batch_size': 64, 'sched': 'step', 'lr': 0.01, "
          "'lr_noise': None, 'lr_noise_pct': 0.67, 'lr_noise_std': 1.0, "
          "'lr_cycle_mul': 1.0, 'lr_cycle_limit': 1, 'transfer_lr_mult': 0.01, "
          "'output_lr_mult': 0.1, 'warmup_lr': 0.0001, 'min_lr': 1e-05, "
          "'epochs': 100, 'start_epoch': 0, 'decay_epochs': 30, "
          "'warmup_epochs': 3, 'cooldown_epochs': 10, 'patience_epochs': 10, "
          "'decay_rate': 0.1, 'bn_momentum': None, 'bn_eps': None, 'sync_bn': "
          "False, 'early_stop_patience': 10, 'early_stop_min_delta': 0.001, "
          "'early_stop_baseline': -inf, 'early_stop_max_value': inf}, "
          "'augmentation': {'no_aug': False, 'scale': (0.08, 1.0), 'ratio': "
          "(0.75, 1.3333333333333333), 'hflip': 0.5, 'vflip': 0.0, "
          "'color_jitter': 0.4, 'auto_augment': None, 'mixup': 0.0, 'cutmix': "
          "0.0, 'cutmix_minmax': None, 'mixup_prob': 1.0, 'mixup_switch_prob': "
          "0.5, 'mixup_mode': 'batch', 'mixup_off_epoch': 0, 'smoothing': 0.1, "
          "'train_interpolation': 'random', 'drop': 0.0, 'drop_path': None, "
          "'drop_block': None}, 'model_ema': {'model_ema': True, "
          "'model_ema_force_cpu': False, 'model_ema_decay': 0.9998}, 'misc': "
          "{'seed': 206, 'log_interval': 50, 'num_workers': 8, 'save_images': "
          "False, 'amp': False, 'apex_amp': False, 'native_amp': False, "
          "'pin_mem': False, 'prefetcher': False, 'eval_metric': 'top1', "
          "'tta': 0, 'use_multi_epochs_loader': False, 'torchscript': False}, "
          "'gpus': [0]}",
  'time': 2.628267288208008,
  'traceback': 'Traceback (most recent call last):\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/autogluon/vision/_gluoncv/image_classification.py", '
               'line 191, in _train_image_classification\n'
               '    result = estimator.fit(train_data=train_data, '
               'val_data=val_data, time_limit=wall_clock_tick-tic)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/gluoncv/auto/estimators/base_estimator.py", '
               'line 175, in fit\n'
               '    ret = self._fit(train_data, val_data, '
               'time_limit=time_limit) if not resume else \\\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/gluoncv/auto/estimators/torch_image_classification/torch_image_classification.py", '
               'line 123, in _fit\n'
               '    return self._resume_fit(train_data, val_data, '
               'time_limit=time_limit)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/gluoncv/auto/estimators/torch_image_classification/torch_image_classification.py", '
               'line 201, in _resume_fit\n'
               '    return self._train_loop(train_loader, val_loader, '
               'time_limit=time_limit)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/gluoncv/auto/estimators/torch_image_classification/torch_image_classification.py", '
               'line 239, in _train_loop\n'
               '    train_metrics = self.train_one_epoch(\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/gluoncv/auto/estimators/torch_image_classification/torch_image_classification.py", '
               'line 337, in train_one_epoch\n'
               '    output = net(input)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", '
               'line 1130, in _call_impl\n'
               '    return forward_call(*input, **kwargs)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", '
               'line 166, in forward\n'
               '    return self.module(*inputs[0], **kwargs[0])\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", '
               'line 1130, in _call_impl\n'
               '    return forward_call(*input, **kwargs)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/timm/models/twins.py", '
               'line 363, in forward\n'
               '    x = self.forward_features(x)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/timm/models/twins.py", '
               'line 354, in forward_features\n'
               '    x = blk(x, size)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", '
               'line 1130, in _call_impl\n'
               '    return forward_call(*input, **kwargs)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/timm/models/twins.py", '
               'line 217, in forward\n'
               '    x = x + self.drop_path(self.attn(self.norm1(x), size))\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", '
               'line 1130, in _call_impl\n'
               '    return forward_call(*input, **kwargs)\n'
               '  File '
               '"/home/ubuntu/.local/lib/python3.8/site-packages/timm/models/twins.py", '
               'line 188, in forward\n'
               '    attn = (q @ k.transpose(-2, -1)) * self.scale\n'
               'RuntimeError: CUDA out of memory. Tried to allocate 16.00 MiB '
               '(GPU 0; 15.78 GiB total capacity; 8.47 GiB already allocated; '
               '7.94 MiB free; 8.53 GiB reserved in total by PyTorch) If '
               'reserved memory is >> allocated memory try setting '
               'max_split_size_mb to avoid fragmentation.  See documentation '
               'for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n',
  'train_acc': -1,
  'valid_acc': -1}