<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/flower_fruit/det_conf_threshold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Determine confidence threshold for Flower/Fruit Classification Models 
---
*Last Updated 27 September 2020*   
Choose which trained model and confidence threshold values to use for classifying flowers/fruits from EOL images. Threshold values should be chosen that maximize coverage and minimize error.

First, choose the N-best models trained in [flower_fruit_train.ipynb](https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/flower_fruit/flower_fruit_preprocessing.ipynb). Then, run this notebook. 

1) Save model predictions and confidence values for 500 images per class  (Flower, Fruit, Null) for each model.   
2) Load saved model prediction and confidence files from 1.   
3) Visualize confidence values for true and false predictions per class to determine thresholds for use with flower-fruit classifiers.

### Imports
---

In [None]:
# Mount google drive to import/export files
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# For working with data and plotting graphs
import itertools
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# For image classification and training
import tensorflow as tf

%cd drive/My Drive/summer20/classification/flower_fruit/det_conf_threshold/

### 1) Save model predictions and confidence values for 500 images per class  (Flower, Fruit, Null) for each model
---   
True and false predictions by confidence value will be used to compare model performance per performance. Get values for models from 7 best training runs. 

In [None]:
# Define functions

# TO DO: Do you want to display classification results for the most recently trained model?
answer = "No" #@param ["Yes", "No"]
# TO DO: If No, manually input desired training attempt number to the right
if answer == "Yes":
  # Display results from most recent training attempt
  last_attempt = !ls /content/drive/'My Drive'/summer20/classification/flower_fruit/saved_models/ | tail -n 1
  TRAIN_SESS_NUM = str(last_attempt.n)
else:
TRAIN_SESS_NUM = "11" #@param ["03", "05", "07", "08", "09", "11", "23_retrain"]

# Load trained model from path
saved_model_path = '/content/drive/My Drive/summer20/classification/flower_fruit/saved_models/' + TRAIN_SESS_NUM
flower_model = tf.keras.models.load_model(saved_model_path)

# Function for plotting classification results with color-coded label if true or false prediction
label_names = ['Flower', 'Fruit', 'Null']

In [None]:
# Run inference
from PIL import Image
import time

# TO DO: Choose the image class to run for (Run 1x per class per model)
base = '/content/drive/My Drive/summer20/classification/'
classifier = "flower_fruit/" #@param ["flower_fruit/"]
true_imclass = "03_null" #@param ["02_fruit", "01_flower", "03_null"]
PATH_TO_TEST_IMAGES_DIR = base + classifier + "images/" + true_imclass
names = os.listdir(PATH_TO_TEST_IMAGES_DIR)
TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, name) for name in names]
outpath = base + classifier + 'det_conf_threshold/flowerfruit_conf_threshold_' + TRAIN_SESS_NUM + "_" + true_imclass + ".csv"

# For determinining confidence threshold
confi = []
true_id = []
det_id = []

# Loops through first 5 image urls from the text file
start = 0 #@param {type:"number"}
end =  500 #@param {type:"number"}
for im_num, im_path in enumerate(TEST_IMAGE_PATHS[start:end], start=1):
    # Load in image
    img = Image.open(im_path)
    image = img.resize((224,224))
    image = np.reshape(image,[1,224,224,3])
    # Record inference time
    start_time = time.time()
    # Detection and draw boxes on image
    predictions = flower_model.predict(image, batch_size=1)
    label_num = np.argmax(predictions)
    conf = predictions[0][label_num]
    otherconfa = predictions[0][:label_num]
    otherconfb = predictions[0][label_num+1:]
    imclass = label_names[label_num]
    other_class = label_names[:label_num]+label_names[label_num+1:]
    end_time = time.time()
    # Display progress message after each image
    print('Inference complete for {} of {} images'.format(im_num, (end-start)))

    # Record confidence, true id, determined id to export and choose confidence thresholds
    confi.append(conf)
    true_id.append(true_imclass.split("_")[1])
    det_id.append(imclass.lower())

# Combine to df
flowfru_conf = pd.DataFrame(([confi, true_id, det_id]))
flowfru_conf = flowfru_conf.transpose()

# TO DO: 
flowfru_conf.to_csv(outpath, index=False, header=("confidence", "true_id", "det_id"))
print(flowfru_conf.head())

### 2) Load saved model prediction and confidence files from 1
---

In [None]:
# Combine confidence threshold values for classes 1-3 for all models
all_mods = ["mod_3", "mod_5", "mod_7", "mod_8", "mod_9", "mod_11", "mod_23"]
base = 'flowerfruit_conf_threshold_' 
exts = ['_1.csv', '_2.csv', '_3.csv']

## Model 3
all_filenames = [base + all_mods[0] + e for e in exts]
# Combine all files in the list
mod_3 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 3:")
print(mod_3.head())

## Model 5
all_filenames = [base + all_mods[1] + e for e in exts]
# Combine all files in the list
mod_5 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 5:")
print(mod_5.head())

## Model 7 
all_filenames = [base + all_mods[2] + e for e in exts]
# Combine all files in the list
mod_7 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 7:")
print(mod_7.head())

## Model 8
all_filenames = [base + all_mods[3] + e for e in exts]
# Combine all files in the list
mod_8 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 8:")
print(mod_8.head())

## Model 9
all_filenames = [base + all_mods[4] + e for e in exts]
# Combine all files in the list
mod_9 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 9:")
print(mod_9.head())

## Model 11
all_filenames = [base + all_mods[5] + e for e in exts]
# Combine all files in the list
mod_11 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 11:")
print(mod_11.head())

## Model 23
all_filenames = [base + all_mods[6] + e for e in exts]
# Combine all files in the list
mod_23 = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model 23:")
print(mod_23.head())

### 3) Visualize confidence values for true and false predictions per class to determine thresholds for use with flower-fruit classifiers
---   
Choose thresholds for a balance between maximizing coverage and minimizing error.

In [None]:
## TO DO: Choose trained model number using drop-down field to right. Run 1x per model
mod = mod_7 #@param ["mod_7", "mod_9", "mod_8", "mod_23", "mod_3", "mod_5", "mod_11"] {type:"raw"}
modn = varname(mod)[1] # Get model name to use for saving figure file
df = mod.copy()

## Split by Image class
# Flower
x1 = df.loc[df["true_id"] == "flower", :]
x1 = x1['confidence']
# Fruit
x2 = df.loc[df["true_id"] == "fruit", :]
x2 = x2['confidence']
# None
x3 = df.loc[df["true_id"] == "none", :]
x3 = x3['confidence']

## Split by True or False dets
df['det'] = (df["true_id"] == df["det_id"])
# True
x1 = df.loc[df.det, :]
# False
x2 = df.loc[~df.det, :]

# Split True and False dets by Image class
# Flower & True
x1a = x1.loc[x1["true_id"] == "flower", :]
x1a = x1a['confidence']
# Flower & False
x2a = x2.loc[x2["true_id"] == "flower", :]
x2a = x2a['confidence']

# Fruit & True
x1b = x1.loc[x1["true_id"] == "fruit", :]
x1b = x1b['confidence']
# Fruit & False
x2b = x2.loc[x2["true_id"] == "fruit", :]
x2b = x2b['confidence']

# None & True
x1c = x1.loc[x1["true_id"] == "null", :]
x1c = x1c['confidence']
# None & False
x2c = x2.loc[x2["true_id"] == "null", :]
x2c = x2c['confidence']

## Plot parameters
kwargs = dict(alpha=0.5, bins=15)
fig, (ax1, ax2, ax3) = plt.subplots(3, figsize=(10, 10))
fig.suptitle('Prediction Confidence Value by class - Model 11')

# Flower
bins, counts = np.histogram(x1a)
ax1.hist(x1a, color='y', label='True Det', **kwargs)
bins, counts = np.histogram(x2a)
ax1.hist(x2a, color='r', label='False Det', **kwargs)
ax1.set_title("Flowers (n=500 images)")
ax1.legend();

# Fruit
ax2.hist(x1b, color='y', label='True Det', **kwargs)
ax2.hist(x2b, color='r', label='False Det', **kwargs)
ax2.set_title("Fruits (n=500 images)")
ax2.legend();

# None
ax3.hist(x1c, color='y', label='True Det', **kwargs)
ax3.hist(x2c, color='r', label='False Det', **kwargs)
ax3.set_title("None (n=500 images)")
ax3.legend();

# Y-axis label
for ax in fig.get_axes():
    ax.set(ylabel='Frequency')
# TO DO: 
figname = modn + '.png'
fig.savefig(figname)