In [1]:
import pandas as pd
import os
import shutil

In [2]:
# Read in data
data = pd.read_csv('vgg19_results.csv')
#data_retrained = pd.read_csv('vgg19_results_retrained.csv')

In [3]:
def analysis(data):
    correct = data[data["correct"] == "y"]
    # Find out highest and lowest confidence score for each category
    # Group by 'true label' and find the min and max of 'true confidence score'
    confidence_summary = correct.groupby("true label")["true confidence score"].agg(["min", "max"]).reset_index()
    # Find the corresponding images for min and max confidence scores
    min_conf_images = correct.loc[correct.groupby("true label")["true confidence score"].idxmin(), ["true label", "image", "true confidence score"]]
    max_conf_images = correct.loc[correct.groupby("true label")["true confidence score"].idxmax(), ["true label", "image", "true confidence score"]]
    
    # Merge the results into a single dataframe
    confidence_images = min_conf_images.merge(max_conf_images, on="true label", suffixes=("_min", "_max"))

    return confidence_images

In [4]:
# Thresholds
high_threshold = 0.99
low_threshold = 0.2

df_correct = data[data["correct"] == "y"]

# Filter rows
high_conf_images = df_correct[df_correct["true confidence score"] > high_threshold]
low_conf_images = df_correct[df_correct["true confidence score"] < low_threshold]

In [5]:
# Function to convert a float to 4 significant digits as a string
def format_confidence(conf):
    return f"{conf:.4g}"

# Function to copy and rename images
def copy_and_rename_images(dataframe, folder_name):
    for _, row in dataframe.iterrows():
        true_label = row['true label']
        image_path = row['image']
        confidence = row['true confidence score']
        ext = os.path.splitext(image_path)[-1]  # preserve file extension (e.g., .jpg, .png)

        # Destination path and filename
        dest_dir = os.path.join('./input', folder_name, true_label)
        os.makedirs(dest_dir, exist_ok=True)
        new_filename = f"{format_confidence(confidence)}{ext}"
        shutil.copy(image_path, os.path.join(dest_dir, new_filename))

# Apply to high and low confidence images
copy_and_rename_images(high_conf_images, 'high_conf')
copy_and_rename_images(low_conf_images, 'low_conf')

In [4]:
# Display the results
pd.set_option('display.max_colwidth', None)

data_images = analysis(data)
data_images_retrained = analysis(data_retrained)

In [5]:
data_images

Unnamed: 0,true label,image_min,true confidence score_min,image_max,true confidence score_max
0,English springer,/home/yi/Downloads/imagenette2/val/English springer/n02102040_1172.JPEG,0.136351,/home/yi/Downloads/imagenette2/val/English springer/n02102040_3221.JPEG,0.999499
1,French horn,/home/yi/Downloads/imagenette2/val/French horn/n03394916_46941.JPEG,0.066215,/home/yi/Downloads/imagenette2/val/French horn/n03394916_40741.JPEG,0.999999
2,cassette player,/home/yi/Downloads/imagenette2/val/cassette player/n02979186_3672.JPEG,0.143643,/home/yi/Downloads/imagenette2/val/cassette player/n02979186_11861.JPEG,0.991158
3,chain saw,/home/yi/Downloads/imagenette2/val/chain saw/n03000684_590.JPEG,0.102411,/home/yi/Downloads/imagenette2/val/chain saw/n03000684_26780.JPEG,1.0
4,church,/home/yi/Downloads/imagenette2/val/church/n03028079_8572.JPEG,0.202896,/home/yi/Downloads/imagenette2/val/church/n03028079_5402.JPEG,0.999355
5,garbage truck,/home/yi/Downloads/imagenette2/val/garbage truck/n03417042_130.JPEG,0.158681,/home/yi/Downloads/imagenette2/val/garbage truck/n03417042_19481.JPEG,0.999999
6,gas pump,/home/yi/Downloads/imagenette2/val/gas pump/n03425413_26601.JPEG,0.067953,/home/yi/Downloads/imagenette2/val/gas pump/ILSVRC2012_val_00009821.JPEG,1.0
7,golf ball,/home/yi/Downloads/imagenette2/val/golf ball/ILSVRC2012_val_00011581.JPEG,0.134957,/home/yi/Downloads/imagenette2/val/golf ball/ILSVRC2012_val_00011431.JPEG,1.0
8,parachute,/home/yi/Downloads/imagenette2/val/parachute/ILSVRC2012_val_00002990.JPEG,0.112971,/home/yi/Downloads/imagenette2/val/parachute/n03888257_14472.JPEG,1.0
9,tench,/home/yi/Downloads/imagenette2/val/tench/n01440764_25090.JPEG,0.268075,/home/yi/Downloads/imagenette2/val/tench/n01440764_10531.JPEG,1.0


In [6]:
data_images_retrained

Unnamed: 0,true label,image_min,true confidence score_min,image_max,true confidence score_max
0,English springer,/home/yi/Downloads/imagenette2/val/English springer/n02102040_1910.JPEG,0.84336,/home/yi/Downloads/imagenette2/val/English springer/ILSVRC2012_val_00004650.JPEG,1.0
1,French horn,/home/yi/Downloads/imagenette2/val/French horn/n03394916_42912.JPEG,0.378967,/home/yi/Downloads/imagenette2/val/French horn/ILSVRC2012_val_00004301.JPEG,1.0
2,cassette player,/home/yi/Downloads/imagenette2/val/cassette player/n02979186_7712.JPEG,0.548982,/home/yi/Downloads/imagenette2/val/cassette player/ILSVRC2012_val_00008651.JPEG,1.0
3,chain saw,/home/yi/Downloads/imagenette2/val/chain saw/n03000684_10960.JPEG,0.283426,/home/yi/Downloads/imagenette2/val/chain saw/ILSVRC2012_val_00007460.JPEG,1.0
4,church,/home/yi/Downloads/imagenette2/val/church/n03028079_14992.JPEG,0.36576,/home/yi/Downloads/imagenette2/val/church/ILSVRC2012_val_00003682.JPEG,1.0
5,garbage truck,/home/yi/Downloads/imagenette2/val/garbage truck/n03417042_3331.JPEG,0.436759,/home/yi/Downloads/imagenette2/val/garbage truck/ILSVRC2012_val_00002210.JPEG,1.0
6,gas pump,/home/yi/Downloads/imagenette2/val/gas pump/n03425413_8992.JPEG,0.286137,/home/yi/Downloads/imagenette2/val/gas pump/ILSVRC2012_val_00004452.JPEG,1.0
7,golf ball,/home/yi/Downloads/imagenette2/val/golf ball/n03445777_10141.JPEG,0.275994,/home/yi/Downloads/imagenette2/val/golf ball/ILSVRC2012_val_00009902.JPEG,1.0
8,parachute,/home/yi/Downloads/imagenette2/val/parachute/n03888257_9671.JPEG,0.256993,/home/yi/Downloads/imagenette2/val/parachute/ILSVRC2012_val_00006561.JPEG,1.0
9,tench,/home/yi/Downloads/imagenette2/val/tench/n01440764_14190.JPEG,0.334159,/home/yi/Downloads/imagenette2/val/tench/ILSVRC2012_val_00009191.JPEG,1.0


In [7]:
# Copy corresponding images to input folder for analysis
import os
import shutil

# Iterate through the dataframe rows
for _, row in data_images_retrained.iterrows():
    true_label = row['true label']
    image_min = row['image_min']
    image_max = row['image_max']
    
    # Create the destination directory if it doesn't exist
    dest_dir = os.path.join("./input", true_label)
    os.makedirs(dest_dir, exist_ok=True)
    
    # Copy the images
    shutil.copy(image_min, os.path.join(dest_dir, os.path.basename(image_min)))
    shutil.copy(image_max, os.path.join(dest_dir, os.path.basename(image_max)))