This notebook runs inference on the entire train dataset and removes images for which (a decently trained) model's prediction doesn't match the given class label, if the prediction is excessively confident.
For example, if the prediction is 1, but the label is 4, then perhaps this is due to a labelling error.

In [1]:
import json
import os
from datetime import timedelta
from types import SimpleNamespace
import cv2
from config import Configuration
from train_manager import TrainManager
from utils import set_seeds
import time
import pandas as pd
%load_ext autoreload
%autoreload 2

In [4]:
experiment_name = 'sgd_coswarm_bnf_bitemp_smooth_weighted_t1=0.3_t2=1.0_89-53'
confidence_thresholds = [0.5, 0.6, 0.7]

In [3]:
def run_inference(experiment_name, tta, weight_avg):
    experiment_dir = os.path.abspath(f'trained-models/{experiment_name}')

    with open(experiment_dir + '/experiment_config.json', 'r') as f:
        config = Configuration()
        config = json.load(f, object_hook=lambda d: SimpleNamespace(**d))
        print(config.model_arch)
        set_seeds(config.seed)

    if config.num_workers > 0:
        cv2.setNumThreads(0)

    inference_start = time.time()

    # we are cleaning the training set
    df = pd.read_csv('data/train.csv', engine='python')
    manager = TrainManager(holdout_df=df, config=config, cleaning_data=True,
                           experiment_dir=experiment_dir, experiment_name=experiment_name)

    for i in confidence_thresholds:
        manager.test(tta, weight_avg, mode='vote', confidence_threshold=i)

    print(f"Inference time: {str(timedelta(seconds=time.time() - inference_start))}")

In [None]:
run_inference(experiment_name, tta=0, weight_avg=False)

tf_efficientnet_b4_ns


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


conv_stem.weight True
bn1.weight False
bn1.bias False
blocks.0.0.conv_dw.weight True
blocks.0.0.bn1.weight False
blocks.0.0.bn1.bias False
blocks.0.0.se.conv_reduce.weight True
blocks.0.0.se.conv_reduce.bias True
blocks.0.0.se.conv_expand.weight True
blocks.0.0.se.conv_expand.bias True
blocks.0.0.conv_pw.weight True
blocks.0.0.bn2.weight False
blocks.0.0.bn2.bias False
blocks.0.1.conv_dw.weight True
blocks.0.1.bn1.weight False
blocks.0.1.bn1.bias False
blocks.0.1.se.conv_reduce.weight True
blocks.0.1.se.conv_reduce.bias True
blocks.0.1.se.conv_expand.weight True
blocks.0.1.se.conv_expand.bias True
blocks.0.1.conv_pw.weight True
blocks.0.1.bn2.weight False
blocks.0.1.bn2.bias False
blocks.1.0.conv_pw.weight True
blocks.1.0.bn1.weight False
blocks.1.0.bn1.bias False
blocks.1.0.conv_dw.weight True
blocks.1.0.bn2.weight False
blocks.1.0.bn2.bias False
blocks.1.0.se.conv_reduce.weight True
blocks.1.0.se.conv_reduce.bias True
blocks.1.0.se.conv_expand.weight True
blocks.1.0.se.conv_expand.bi

normal inference on model 0
Testing: 100%|██████████| 335/335 [02:27<00:00,  3.39it/s]Test epoch ended.
Testing: 100%|██████████| 335/335 [02:27<00:00,  2.28it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': tensor(0.9112, device='cuda:0')}
--------------------------------------------------------------------------------
normal inference on model 1
Testing: 100%|██████████| 335/335 [02:30<00:00,  3.36it/s]Test epoch ended.
Testing: 100%|██████████| 335/335 [02:30<00:00,  2.23it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': tensor(0.8963, device='cuda:0')}
--------------------------------------------------------------------------------
normal inference on model 2
Testing:  69%|██████▉   | 232/335 [01:45<00:46,  2.22it/s]

In [8]:
marked = pd.read_csv(f'data/mismatch_marked_train-0.6-{experiment_name}.csv')

In [9]:
print('Percentage wrongly, but confidently predicted {:.2f}%'.format(len(marked[marked.mismatch == True]) / len(marked) * 100))

Percentage wrongly, but confidently predicted 5.66%


In [13]:
cleaned = pd.DataFrame()
matched = marked[marked.mismatch == False]
headers = ['image_id', 'label']
data = [matched.image_id, matched.actual]
cleaned = pd.concat(data, keys=headers, axis=1)
cleaned.to_csv('train_cleaned-0.6.csv', index=False)