<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/image_type/inspect_train_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Determine confidence threshold for Image Type Classification Models 
---
*Last Updated 17 December 2022*   
Choose which trained model and confidence threshold values to use for classifying EOL images as maps, phylogenies, illustrations, or herbarium sheets. Threshold values should be chosen that maximize coverage and minimize error.

First, choose the 2 best models trained in [image_type_train.ipynb](https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/image_type/image_type_preprocessing.ipynb). Then, run this notebook.

Run 500 images per class (map, phylogeny, illustration, herbarium sheet) through the best models chosen in image_type_train.ipynb for validation of model performance. Plot histograms of true and false predictions per class at binned confidence intervals to find the best performance by class and confidence threshold. (This is helpful because all models may not learn classes equally well).

***Models were trained in Python 2 and TF 1 in October 2020: MobileNet SSD v2 was trained for 3 hours to 30 epochs with Batch Size=16, Lr=0.00001, Dropout=0.3, epsilon=1e-7, Adam optimizer. Final validation accuracy = 0.90. Inception v3 was trained for 3.5 hours to 30 epochs with Batch Size=16, Lr=0.0001, Dropout=0.2, epsilon=1, Adam optimizer. Final validation accuracy = 0.89.***

Notes:   
* Run code blocks by pressing play button in brackets on left
* Before you you start: change the runtime to "GPU" with "High RAM"
* Change parameters using form fields on right (find details at corresponding lines of code by searching '#@param')

## Installs & Imports
---

In [None]:
#@title Choose where to save results & set up directory structure
# Use dropdown menu on right
save = "in Colab runtime (files deleted after each session)" #@param ["in my Google Drive", "in Colab runtime (files deleted after each session)"]
print("Saving results ", save)

# Mount google drive to export file(s)
if 'Google Drive' in save:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

# Type in the path to your working directory in form field to right
import os
basewd = "/content/drive/MyDrive/train/tf2" #@param ["/content/drive/MyDrive/train/tf2"] {allow-input: true}
if not os.path.exists(basewd):
    os.makedirs(basewd)

# Folder where inspect results outputs will be saved
results_folder = "inspect_resul" #@param ["inspect_resul"] {allow-input: true}
cwd = basewd + '/' + results_folder
if not os.path.exists(cwd):
    os.makedirs(cwd)
print("\nWorking directory set to: \n", cwd)

# Enter image classes of interest in form field
filters = ['herb', 'illus', 'map', 'null', 'phylo'] #@param ["['herb', 'illus', 'map', 'null', 'phylo']"] {type:"raw", allow-input: true}

# Folder where image metadata was saved in image_type_preprocessing.ipynb
data_folder = "pre-processing/image_data" #@param ["pre-processing/image_data"] {allow-input: true}
data_wd = basewd + '/' + data_folder
if not os.path.exists(data_wd):
    !pip3 install --upgrade gdown
    os.makedirs(data_wd)
    print("\nDownload image bundles for image type classes {}...\n".format(filters))
    %cd $data_wd
    file_ids = ['1Bkh2-TZSIKCCoKOTNr2L65BwR92Nx0vZ', '1m2sOLpUOWsw5RwzRtvj0mqH8aPloqnE_', \
                '1EIwPxyrawXnTPMyvO8f4nc1e3HALrTp9', '16I-_Qbh2IX_1Oz5wqlE6uzWXB2VhjE3e', \
                '1hQNgRLZWZu77XAxBwQIJOgRmWCCcpMos']
    for file_id in file_ids:
        !gdown $file_id
print("\nImage metadata directory set to: \n", data_wd)

# Folder where saved models were stored in image_type_train.ipynb
models_folder = "saved_models" #@param ["saved_models"] {allow-input: true}
models_wd = basewd + '/' + models_folder
if not os.path.exists(models_wd):
    os.makedirs(models_wd)
    print("\nDownloading pre-trained EOL models for training attempts 11, 13...\n")
    %cd $models_wd
    file_ids = ['1Sxp742kescTGAUKlVkRR2hcoVo39y4pd', '1Fr1x5ZLXdd-DBZ7yRWx691orbtXh9lW1']
    outfnames = ['11.zip', '13.zip']
    for idx, file_id in enumerate(file_ids):
        file_download_link = "https://docs.google.com/uc?export=download&id=" + file_id
        outfname = outfnames[idx]
        outfolder = outfnames[idx].split('.')[0]
        !mkdir $outfolder
        !gdown $file_id
        !unzip $outfname -d .
        outfpath = "content/drive/MyDrive/summer20/classification/image_type/saved_models/" + outfolder + "/*"
        !mv -v $outfpath $outfolder
        !rm -r content 
        !rm -r $outfname

print("\nSaved models directory set to: \n", models_wd)

In [None]:
# For working with data
import itertools
import os
import numpy as np
import pandas as pd
# Suppress pandas setting with copy warning
pd.options.mode.chained_assignment = None  # default='warn'

# For downloading and displaying images
import matplotlib.pyplot as plt
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
%matplotlib inline

# For measuring inference time
import time

# For image classification and training
import tensorflow as tf

## Run images through for classification and validating predictions (Run 1x for each trained model)   
---
Selected models from image_type_train.ipynb   
* Run 11: Inception v3
* Run 13: Mobilenet SSD v2

In [None]:
# Define functions

# To read in EOL formatted data files
def read_datafile(fpath, sep="\t", header=0, disp_head=True, lineterminator='\n', encoding='latin1', dtype=None):
    try:
        df = pd.read_csv(fpath, sep=sep, header=header, lineterminator=lineterminator, encoding=encoding, dtype=dtype)
        if disp_head:
          print("Data header: \n", df.head())
    except FileNotFoundError as e:
        raise Exception("File not found: Enter the path to your file in form field and re-run").with_traceback(e.__traceback__)
    
    return df

# Define start and stop indices in EOL bundle for running inference   
def set_start_stop(run, df):
    # To test with a tiny subset, use 50 random bundle images
    N = len(df)
    if "tiny subset" in run:
        start=np.random.choice(a=N, size=1)[0]
        stop=start+50
    # To run for a larger set, use 500 random images
    else:
        start=np.random.choice(a=N, size=1)[0]
        stop=start+500
    
    return start, stop

# Load saved model from directory
def load_saved_model(models_wd, TRAIN_SESS_NUM, module_selection):
    # Load trained model from path
    saved_model_path = models_wd + '/' + TRAIN_SESS_NUM
    model = tf.keras.models.load_model(saved_model_path)
    # Get name and image size for model type
    handle_base, pixels = module_selection

    return model, pixels, handle_base

# Get info about model based on training attempt number
def get_model_info(TRAIN_SESS_NUM):
    # Session 11
    if int(TRAIN_SESS_NUM) == 11:
        module_selection = ("inception_v3", 299)
    # Session 13
    elif int(TRAIN_SESS_NUM) == 13:
        module_selection = ("mobilenet_v2_1.0_224", 224)
    dataset_labels = filters 

    return module_selection, dataset_labels

# Get test image filepaths
def get_test_images(true_imclass):
    impath = cwd + '/pre-processing/images/' + true_imclass
    # If already custom-trained model, pull test images to inspect results for
    if os.path.exists(impath):
        demo = False # Not running in demo mode
        fns = os.listdir(impath)
        TEST_IMAGE_PATHS = [os.path.join(impath, fn) for fn in fns]
        print("\nUsing test images from: \n", impath)
    # If running this script to test functionality, download dummy dataset from EOL image bundles
    else:
        demo = True # Running in demo mode using only Colab Runtime files
        fns = os.listdir(data_wd)
        TEST_IMAGE_PATHS = []
        for fn in fns:
            fpath = data_wd + '/' + fn
            df = pd.read_csv(fpath, sep='\n', header=None)
            start=np.random.choice(a=len(df), size=1)[0]
            stop=start+5
            TEST_IMAGE_PATHS.extend(df.iloc[start:stop, 0].values.tolist())
        print("\nUsing 50 random images from each EOL image type bundle: \n", fns)

    return TEST_IMAGE_PATHS, demo

# Set filename for saving classification results
def set_outfpath(true_imclass):
    outfpath = cwd + '/image_type_' + TRAIN_SESS_NUM + '_' + true_imclass + '.csv'
    print("\nSaving results to: \n", outfpath)

    return outfpath

# Load in image from file
def image_from_file(im_path):
    imga = Image.open(im_path) # rgba (with transp)
    colormode = imga.getbands()
    img = imga.convert('RGB') # convert to rgb
    image = img.resize([pixels,pixels])
    image = np.reshape(image,[1,pixels,pixels,3])
    image = image*1./255 # normalize colorspace

    return image, colormode

# Load in image from URL
# Modified from https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/saved_model.ipynb#scrollTo=JhVecdzJTsKE
def image_from_url(url, fn):
    file = tf.keras.utils.get_file(fn, url) # Filename doesn't matter
    disp_img = tf.keras.preprocessing.image.load_img(file)
    image = tf.keras.preprocessing.image.load_img(file, target_size=[pixels, pixels])
    colormode = image.getbands()
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = tf.keras.applications.mobilenet_v2.preprocess_input(
        image[tf.newaxis,...])

    return image, colormode

# Get info from predictions to display on images
def get_predict_info(predictions, i, stop, start):
    # Get info from predictions
    label_num = np.argmax(predictions[0], axis=-1)
    conf = predictions[0][label_num]
    im_class = dataset_labels[label_num]
    # Display progress message after each image
    print("Completed for {} of {} files".format(i+1, format(stop-start, '.0f')))
    
    return label_num, conf, im_class

# Make placeholder lists to fill for each class
def make_placeholders():
    filenames = []
    confidences = []
    true_imclasses = []
    det_imclasses = []
    colormodes = []

    return filenames, confidences, true_imclasses, det_imclasses, colormodes
    
# Add values for each image to placeholder list
def record_results(fn, conf, true_imclass, det_imclass, colormode):
    filenames.append(fn)
    confidences.append(conf)
    true_imclasses.append(true_imclass)
    det_imclasses.append(det_imclass)
    colormodes.append(colormode)
    results = [filenames, confidences, true_imclasses, det_imclasses, colormodes]

    return results

# Export results
def export_results(results, outfpath):
    results = pd.DataFrame(results)
    results = results.transpose()
    results.to_csv(outfpath, index=False, header=("filename", "confidence", 
                                                     "true_id", "det_id", "colormode"))
    print("\nClassification predictions for image class {} being saved to : \n{}\n".format(
          true_imclass, outfpath))

In [None]:
#@title Run inference for chosen Training Session Number (11, 13) and dataset size
%cd $cwd

# Choose training attempt number to inspect results for
TRAIN_SESS_NUM = "13" #@param ["11", "13"] {allow-input: true}

# Test pipeline with a smaller subset than 5k images?
run = "test with tiny subset" #@param ["test with tiny subset", "for 500 images"]
print("Run: ", run)

# Load saved model
module_selection, dataset_labels = get_model_info(TRAIN_SESS_NUM)
print("Loading saved model ", module_selection)
model, pixels, handle_base = load_saved_model(models_wd, TRAIN_SESS_NUM, module_selection)

# Run inference for each image class to compare known versus predicted image types
true_imclasses = filters
for true_imclass in true_imclasses:
    print("Runing inference for class: {}\n".format(true_imclass))
    # Set filename for saving classification results
    outfpath = set_outfpath(true_imclass)
    # Make placeholder lists to record values for each image
    filenames, confidences, true_imclasses, det_imclasses, colormodes = make_placeholders()
    # Get test images for running inference
    df, demo = get_test_images(true_imclass)

    # Run 500 random EOL bundle images through trained model
    start, stop = set_start_stop(run, df)
    for i, row in enumerate(df[start:stop], start=1):
        try:
            # Read in image from file
            if demo:
                url = row
                fn = str(i) + '.jpg'
                img, colormode = image_from_url(url, fn)
            else:
                img, colormode = image_from_file(row)
        
            # Image classification
            start_time = time.time() # Record inference time
            predictions = model.predict(img, batch_size=1)
            label_num, conf, det_imclass = get_predict_info(predictions, i, stop, start)
            end_time = time.time()
            print("Inference time: {} sec".format(format(end_time-start_time, '.2f')))

            # Record results in placeholder lists to inspect results in next step
            results = record_results(row, conf, true_imclass, det_imclass, colormode)

        except:
            pass

    # Combine to df and export results
    export_results(results, outfpath)

In [None]:
#@title Combine model outputs for image type classes

# Combine prediction files created in codeblock above
base = 'image_type_' + TRAIN_SESS_NUM + '_'
imclasses = filters
all_filenames = [base + imclass + '.csv' for imclass in imclasses]
all_predictions = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model predictions for Training Attempt {}, {}:".format(TRAIN_SESS_NUM, handle_base))
print("Image type predictions combined for all classes. \nNo. Images: {}\n{}".format(len(all_predictions), all_predictions[['filename', 'true_id', 'det_id']].head()))

## Plot prediction error and confidence for each class (Run 1x for each trained model)
---   
Use these histograms to find a confidence threshold value to optimize dataset coverage and accuracy

### Plot histograms

In [None]:
# Define functions

# Calculate prediction accuracy
def get_accuracy(obs, all_vals):
    # obs = observed, all_vals = observed + expected
    if obs:
        accuracy = format((obs/all_vals), '.2f')
    else:
        accuracy = 0
    
    return accuracy

# Valide predictions by image class (and optionally, by: taxon)
def validate_predict(df, inspect_by_taxon, taxon):
    # If inspecting for taxon-specific images only
    if inspect_by_taxon:
        taxon = taxon
        df = df.loc[df.ancestry.str.contains(taxon, case=False, na=False)]
        print("Inspecting results for {}:\n{}".format(taxon, df.head()))
    
    # Validate predictions
    # Check where true image types and model-determined classes match
    df['det'] = (df['true_id'] == df['det_id'])
    tru = df.loc[df.det, :] # True ID
    fal = df.loc[~df.det, :] # False ID

    return tru, fal, taxon

# Plot results by image class
def plot_predict_x_conf(tru, fal, thresh, imclasses=imclasses):
    # Break up predictions by image class and confidence values
    # Define variables
    c0,c1,c2,c3,c4 = [imclasses[i] for i in range(0, len(imclasses))]
    # Check how many true/false predictions are at each confidence value
    # Class 0 - 'herb'
    c0t = tru.loc[tru['true_id'] == c0, :] # True dets
    c0f = fal.loc[fal['true_id'] == c0, :] # False dets
    # Class 1 - 'illus'
    c1t = tru.loc[tru['true_id'] == c1, :] 
    c1f = fal.loc[fal['true_id'] == c1, :] 
    # Class 2 - 'map'
    c2t = tru.loc[tru['true_id'] == c2, :] 
    c2f = fal.loc[fal['true_id'] == c2, :] 
    # Class 3 - 'null'
    c3t = tru.loc[tru['true_id'] == c3, :] 
    c3f = fal.loc[fal['true_id'] == c3, :] 
    # Class 4 - 'phylo'
    c4t = tru.loc[tru['true_id'] == c4, :] 
    c4f = fal.loc[fal['true_id'] == c4, :] 

    
    # Plot parameters to make 1 subplot per image class
    kwargs = dict(alpha=0.5, bins=15)
    fig, axes = plt.subplots(len(imclasses), figsize=(10, 10), constrained_layout=True)
    fig.suptitle('Prediction Confidence by Class\n Overall Accuracy: {}'.format(
                  format((len(tru)/(len(tru)+len(fal))),'.2f')))
    
    # Make subplots
    # Class 0 - 'herb'
    # True predictions
    axes[0].hist(c0t['confidence'], color='y', label='True Det', **kwargs)
    # False predictions
    axes[0].hist(c0f['confidence'], color='r', label='False Det', **kwargs)
    axes[0].set_title("{} (n={} images)\n Accuracy: {}".format(imclasses[0], 
                      len(c0t+c0f), format((len(c0t)/(len(c0t)+len(c0f))),'.2f')))
    axes[0].legend();

    # Class 1 - 'illus'
    # True predictions
    axes[1].hist(c1t['confidence'], color='y', label='True Det', **kwargs)
    # False predictions
    axes[1].hist(c1f['confidence'], color='r', label='False Det', **kwargs)
    axes[1].set_title("{} (n={} images)\n Accuracy: {}".format(imclasses[1], 
                      len(c1t+c1f), format((len(c1t)/(len(c1t)+len(c1f))),'.2f')))
    axes[1].legend();

    # Class 2 - 'herb'
    # True predictions
    axes[2].hist(c2t['confidence'], color='y', label='True Det', **kwargs)
    # False predictions
    axes[2].hist(c2f['confidence'], color='r', label='False Det', **kwargs)
    axes[2].set_title("{} (n={} images)\n Accuracy: {}".format(imclasses[2], 
                      len(c2t+c2f), format((len(c2t)/(len(c2t)+len(c2f))),'.2f')))
    axes[2].legend();

    # Class 3 - 'null'
    # True predictions
    axes[3].hist(c3t['confidence'], color='y', label='True Det', **kwargs)
    # False predictions
    axes[3].hist(c3f['confidence'], color='r', label='False Det', **kwargs)
    axes[3].set_title("{} (n={} images)\n Accuracy: {}".format(imclasses[3], 
                      len(c3t+c3f), format((len(c3t)/(len(c3t)+len(c3f))),'.2f')))
    axes[3].legend();

    # Class 4 - 'phylo'
    # True predictions
    axes[4].hist(c4t['confidence'], color='y', label='True Det', **kwargs)
    # False predictions
    axes[4].hist(c4f['confidence'], color='r', label='False Det', **kwargs)
    axes[4].set_title("{} (n={} images)\n Accuracy: {}".format(imclasses[4], 
                      len(c4t+c4f), format((len(c4t)/(len(c4t)+len(c4f))),'.2f')))
    axes[4].legend();

    # Add Y-axis labels
    for ax in fig.get_axes():
        ax.set(ylabel='Freq (# imgs)')
        if thresh:
            ax.axvline(thresh, color='k', linestyle='dashed', linewidth=1)

    return fig

# To save the figure
def save_figure(fig, taxon, TRAIN_SESS_NUM=TRAIN_SESS_NUM, handle_base=handle_base):
    # Make filename
    if taxon: # If for a specific taxon
        if 'plant' in taxon:
            handle_base = handle_base + '_plantae'
        elif 'anim' in taxon:
            handle_base = handle_base + '_animalia'

    outfpath = TRAIN_SESS_NUM + '_' + handle_base + '.png'
    fig.savefig(outfpath)
    print("Histograms saved to ", outfpath)

    return outfpath

In [None]:
#@title Plot figures (Optional: inspect for specific taxon and/or add a confidence threshold line)

# Load combined prediction results
df = all_predictions.copy()

# Optional: Inspect predictions for taxon-specific images only?
inspect_by_taxon = False #@param {type:"boolean"}
taxon = "" #@param {type:"string"}

thresh = 0 #@param {type:"number"}

# Valide predictions by image class (and optionally, by: taxon)
tru, fal, taxon = validate_predict(df, inspect_by_taxon, taxon)

# Plot result accuracy by image class (optionally, with confidence threshold line)
fig = plot_predict_x_conf(tru, fal, thresh, imclasses)

# Export histograms
figname = save_figure(fig, taxon)

### Simulate resulting dataset sizes based on different confidence thresholds

In [None]:
# Load combined prediction results
df = all_predictions.copy()

# Split by True or False determined image ID
df['det'] = (df["true_id"] == df["det_id"])
tru = df.loc[df.det, :] # True ID
fal = df.loc[~df.det, :] # False ID
 
# Confidence values to test  
conf_vals = [1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2] #@param
for conf_val in conf_vals: 
    df_c = df.loc[df["confidence"] > conf_val, :]
    true_c = tru.loc[tru["confidence"] > conf_val, :]
    fal_c = fal.loc[fal["confidence"] > conf_val, :]
    all_vals = true_c.append(fal_c)
    print("\nConfidence Value: {}\n".format(conf_val))
    print("Accuracy for confidence > {}: {}".format(conf_val, get_accuracy(len(true_c), len(all_vals))))
    print("Predictions Retained (%): {}".format(len(df_c)/len(df)))
    print("True Predictions Retained (%): {}".format(format((len(true_c)/len(tru)), '.2f')))
    print("False Predictions Retained (%): {}".format(format((len(fal_c)/len(fal)), '.2f')))
    print("Accuracy for confidence > {}, by class:".format(conf_val))
    # By class
    for imclass in imclasses:
        true_det_c = len(true_c.loc[true_c["true_id"] == imclass, :])
        all_det_c = len(all_vals.loc[all_vals["true_id"] == imclass, :])
        accuracy = get_accuracy(true_det_c, all_det_c)
        print("{}: {}".format(imclass, accuracy))

## Inspect detections by image colorspace 
--- 
Noticed that many false dets in illustrations were from greyscale color mode ('L' in pillow). Look at true and false detections for greyscale images in each class

In [None]:
# Break up predictions by image class and colorspace

# Define variables
c0,c1,c2,c3,c4 = [imclasses[i] for i in range(0, len(imclasses))]
# Check how many true/false predictions are at each confidence value
# Class 0 - 'herb'
c0t = tru.loc[tru['true_id'] == c0, :] # True dets
c0f = fal.loc[fal['true_id'] == c0, :] # False dets
# Class 1 - 'illus'
c1t = tru.loc[tru['true_id'] == c1, :] 
c1f = fal.loc[fal['true_id'] == c1, :] 
# Class 2 - 'map'
c2t = tru.loc[tru['true_id'] == c2, :] 
c2f = fal.loc[fal['true_id'] == c2, :] 
# Class 3 - 'null'
c3t = tru.loc[tru['true_id'] == c3, :] 
c3f = fal.loc[fal['true_id'] == c3, :] 
# Class 4 - 'phylo'
c4t = tru.loc[tru['true_id'] == c4, :] 
c4f = fal.loc[fal['true_id'] == c4, :] 

# Class 0 - Herbarium Sheet
print("\n{}".format(c0))
print("False detections: {}\nTrue detections: {}".format(len(c0f), len(c0t)))
f_by_col = c0f.loc[c0f["colormode"]=="('L',)", :]
t_by_col = c0t.loc[c0t["colormode"]=="('L',)", :]
print("False for greyscale: {}\nTrue for greyscale: {}".format(len(f_by_col), len(t_by_col)))

# Class 1 - Illustration
print("\n{}".format(c1))
print("False detections: {}\nTrue detections: {}".format(len(c1f), len(c1t)))
f_by_col = c1f.loc[c1f["colormode"]=="('L',)", :]
t_by_col = c1t.loc[c1t["colormode"]=="('L',)", :]
print("False for greyscale: {}\nTrue for greyscale: {}".format(len(f_by_col), len(t_by_col)))

# Class 2 = Map
print("\n{}".format(c2))
print("False detections: {}\nTrue detections: {}".format(len(c2f), len(c2t)))
f_by_col = c2f.loc[c2f["colormode"]=="('L',)", :]
t_by_col = c2t.loc[c2t["colormode"]=="('L',)", :]
print("False for greyscale: {}\nTrue for greyscale: {}".format(len(f_by_col), len(t_by_col)))

# Class 3 = Null
print("\n{}".format(c3))
print("False detections: {}\nTrue detections: {}".format(len(c3f), len(c3t)))
f_by_col = c3f.loc[c3f["colormode"]=="('L',)", :]
t_by_col = c3t.loc[c3t["colormode"]=="('L',)", :]
print("False for greyscale: {}\nTrue for greyscale: {}".format(len(f_by_col), len(t_by_col)))

# Class 4 = Phylogeny
print("\n{}".format(c4))
print("False detections: {}\nTrue detections: {}".format(len(c4f), len(c4t)))
f_by_col = c4f.loc[c4f["colormode"]=="('L',)", :]
t_by_col = c4t.loc[c4t["colormode"]=="('L',)", :]
print("False for greyscale: {}\nTrue for greyscale: {}".format(len(f_by_col), len(t_by_col)))