<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/rating/inspect_train_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Determine confidence threshold for Image Rating Classification Models 
---
*Last Updated 29 December 2020*   
Choose which trained model and confidence threshold values to use for classifying EOL image ratings. Threshold values should be chosen that maximize coverage and minimize error.

First, choose the best models trained in [rating_train.ipynb](https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/rating/rating_train.ipynb). Then, run this notebook. 

1) Save model predictions and confidence values for 50 images per class (Image rating 1-5) for best models chosen in rating_train.ipynb (Run 20: Inception v3 - trained on 'good' and 'bad' classes; Run 18: Mobilenet SSD v2 - trained on 'good' and 'bad' classes; Run 06: Inception v3 - trained on numerical rating classes 1-5).   
2) Load saved model prediction and confidence files from 1.   
3) Visualize confidence values for true and false predictions per class to determine thresholds for use with image type classifiers (with option to inspect by taxon).

### Imports
---

In [None]:
# Mount google drive to import/export files
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# For working with data and plotting graphs
import itertools
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# For image classification and training
import tensorflow as tf
%cd drive/My Drive/summer20/classification/rating/

### 1) Save model predictions and confidence values for 50 images per class  (rating 1-5) for best training run
---   
True and false predictions by confidence value will be used to compare model performance per class. Get values for models from best training runs in rating_train.ipynb. Change values where you see 'TO DO'.   

#### Run 1x: Build dataset of image ratings for images not previously seen by models 
(Image ratings found in EOL user generated rating and exemplar files not included in 7k training/testing datasets)

In [None]:
# Make sure in rating/
#%cd ../
%cd rating

# Get list of images used for 7k training/testing datasets
nums = ['one', 'two', 'three', 'four', 'five']
all_fns = ['image_data/bundles/' + num + '_download_7k.txt' for num in nums] # Image rating filenames
print(all_fns)

# For classes 1-4
# List all ratings used to build 7k training/testing datasets
used_imgs = []
for fn in all_fns[:4]:
    df = pd.read_csv(fn, index_col=0, header=None, sep='\t')
    df1 = df.iloc[::2].reset_index() # hacky step to delete every other row, bc they contain old index/rownums
    df1.columns = ['link']
    used_imgs.append(df1)
# For class 5 
# bundle was made differently (from EOL ratings & exemplars) so formatting is not same as 1-4
df = pd.read_csv(all_fns[4], index_col=None, header=1, sep='\t') 

# Combine ratings for classes 1-5
df.columns = ['link']
used_imgs.append(df)
used = pd.concat(used_imgs, axis=0, ignore_index=True)
print('Total image ratings used in training/testing: ', len(used), used.head())

# Remove ratings used for training/testing from total EOL rating dataset
df = pd.read_csv("/content/drive/My Drive/summer20/classification/rating/image_data/bundles/image_ratings.txt", sep="\t", lineterminator='\n', encoding='latin1', header=0)
print("Total image ratings available:", len(df))
cond = df['obj_url'].isin(used['link'])
df.drop(df[cond].index, inplace = True)
unused = df.copy()
unused.to_csv('image_data/bundles/unused_image_ratings_foreval.txt', sep="\t", index=False, header=True)
print("Total un-used image ratings available:", len(unused))

# Remove ratings used for training/testing from total EOL exemplar dataset (used to supplment rating 5)
df = pd.read_csv("/content/drive/My Drive/summer20/classification/rating/image_data/bundles/images_selected_as_exemplar.txt", sep="\t", lineterminator='\n', encoding='latin1', header=0)
print("Total image exemplars available:", len(df))
cond = df['object_url'].isin(used['link'])
df.drop(df[cond].index, inplace = True)
unused = df.copy()
unused.to_csv('image_data/bundles/unused_image_exemplars_foreval.txt', sep="\t", index=False, header=True)
print("Total un-used image exemplars available:", len(unused))

#### Run 1x for each of 3 models: Run images through models for classification   
Selected models from rating_train.ipynb   
* Run 20: Inception v3 (trained on 'good' and 'bad' classes)
* Run 18: Mobilenet SSD v2 (trained on 'good' and 'bad' classes)
* Run 06: Inception v3 (trained on numerical rating classes 1-5)

In [None]:
%cd inspect_resul
# Define functions

# TO DO: Do you want to display classification results for the most recently trained model?
answer = "No" #@param ["Yes", "No"]
# TO DO: If No, manually input desired training attempt number to the right
if answer == "Yes":
  # Display results from most recent training attempt
  last_attempt = !ls /content/drive/'My Drive'/summer20/classification/rating/saved_models/ | tail -n 1
  TRAIN_SESS_NUM = str(last_attempt.n)
else:
  TRAIN_SESS_NUM = "06" #@param ["20", "18", "06"]

# Load trained model from path
saved_model_path = '/content/drive/My Drive/summer20/classification/rating/saved_models/' + TRAIN_SESS_NUM
rating_model = tf.keras.models.load_model(saved_model_path)

# Model type for different train sessions
# Session 18
if int(TRAIN_SESS_NUM) == 18:
  module_selection =("mobilenet_v2_1.0_224", 224)
# Sessions 20 and 6
else:
  module_selection = ("inception_v3", 299) 
handle_base, pixels = module_selection
IMAGE_SIZE = (pixels, pixels)

# Function for plotting classification results with color-coded label if true or false prediction
if int(TRAIN_SESS_NUM) > 7:
  label_names = ['bad', 'good']
else:
  label_names = ['1', '2', '3', '4', '5']

# Load in image from URL
# Modified from https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/saved_model.ipynb#scrollTo=JhVecdzJTsKE
def image_from_url(url, fn):
  file = tf.keras.utils.get_file(fn, url) # Filename doesn't matter
  disp_img = tf.keras.preprocessing.image.load_img(file)
  img = tf.keras.preprocessing.image.load_img(file, target_size=[pixels, pixels])
  x = tf.keras.preprocessing.image.img_to_array(img)
  x = tf.keras.applications.mobilenet_v2.preprocess_input(
    x[tf.newaxis,...])
  return x, disp_img

#### Run 1x per model per class: Perform inference to test model performance on unseen dataset

In [None]:
# TO DO: Choose different image classes (1-5) to test model performance on never before seen images (not used in trianing or testing)
num = 3 #@param {type:"slider", min:1, max:5, step:1}
if num < 5:
  df1 = pd.read_csv("/content/drive/My Drive/summer20/classification/rating/image_data/bundles/unused_image_ratings_foreval.txt", sep="\t", lineterminator='\n', encoding='latin1', header=0)
  df1.head()
  df = df1.loc[round(df1["overall_rating"])==num]
  df = df[["obj_with_overall_rating", "obj_url", "overall_rating", "ancestry"]].copy()
else:
  df = pd.read_csv("/content/drive/My Drive/summer20/classification/rating/image_data/bundles/unused_image_exemplars_foreval.txt", sep="\t", lineterminator='\n', encoding='latin1', header=0)
  df.head()
  df = df[["target_id", "object_url", "ancestry"]].copy()
  df.rename(columns={"object_url": "obj_url"}, inplace=True)
  df["overall_rating"] = 5
# Randomly sample 500 images/ratings
df = df.sample(500)
df.head()

In [None]:
# Run inference
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import time

# TO DO: Choose the image class to run for (Run 1x per class per model)
base = '/content/drive/My Drive/summer20/classification/'
classifier = "rating/" #@param ["flower_fruit/", "image_type/", "rating/"]
true_imclass = str(num)
outpath = base + classifier + 'inspect_resul/ratings_' + TRAIN_SESS_NUM + '_' + true_imclass + '.csv'
print(outpath)

# For inspecting results based on ancestry and confidence threshold
filen = []
confi = []
true_id = []
det_id = []
ancest = []

# Loops through first 5 image urls from the text file
start = 0 #@param {type:"number"}
end =  50 #@param {type:"number"}
for i, row in df.iloc[start:end].iterrows():
  try:
    # Get url from image bundle
    t1 = time.time()
    url = df['obj_url'][i]
    # Read in image from url
    fn = str(i) + '.jpg'
    img, disp_img = image_from_url(url, fn)
    ancestry = df['ancestry'][i]
    # Record inference time
    start_time = time.time()
    # Detection and draw boxes on image
    predictions = rating_model.predict(img, batch_size=1)
    label_num = np.argmax(predictions)
    conf = predictions[0][label_num]
    imclass = label_names[label_num]
    t2 = time.time()
    # Display progress message after each image
    print("Completed for {}, {} of {} files in {} seconds".format(url, i, format(end-start, '.0f'), format(t2-t1, '.2f')))

    # Record confidence, true id, determined id to export and choose confidence thresholds
    filen.append(fn)
    confi.append(conf)
    true_id.append(true_imclass)
    det_id.append(str(imclass))
    ancest.append(ancestry)
  
  except:
    pass

# Combine to df and export results
rating_conf = pd.DataFrame(([filen, confi, true_id, det_id, ancest]))
rating_conf = rating_conf.transpose()
rating_conf.to_csv(outpath, index=False, header=("filename", "confidence", "true_id", "det_id", "ancestry"))
print(rating_conf.head())

### 2) Load saved model prediction and confidence files from 1
---

In [None]:
%cd /content/drive/My Drive/summer20/classification/rating/inspect_resul/

# Combine confidence threshold values for classes 1-3 for all models
num = "06" #@param ["20", "18", "06"]
base = 'ratings_' + num + '_'
exts = ["1.csv", "2.csv", "3.csv", "4.csv", "5.csv"]

# Combine all files in the list
all_filenames = [base + e for e in exts]
mod = pd.concat([pd.read_csv(f, sep=',', header=0, na_filter = False) for f in all_filenames])
print("Model Results:")
print("No. Images:", len(mod))
print(mod.head())

# Aggregate "true_id" classes into good and bad
if int(num) > 7:
  mod.true_id[mod.true_id==(1 or 2)] = 'bad'
  mod.true_id[mod.true_id==(4 or 5)] = 'good'
else:
  pass

### 3) Look at prediction error and confidence for each class
---   

In [None]:
# Trained model used in 2 above
df = mod.copy()

# Optional: Run inference for taxon-specific images only
# TO DO: Type in the taxon you'd like to inspect results for using form field to right
taxon = "" #@param {type:"string"}
df = df.loc[df.ancestry.str.contains(taxon, case=False, na=False)]
print(df.head())

# Optional: Show threshold value to help choose optimal balance b/w maximizing useful data and minimizing error
thresh=0 #@param

# Split by True or False determined image ID
df['det'] = (df["true_id"] == df["det_id"])
tru = df.loc[df.det, :] # True ID
fal = df.loc[~df.det, :] # False ID

# and by Image class
if int(num) > 7:
  # Bad
  badt = tru.loc[tru["true_id"] == "bad", :] # and True ID
  badt_conf = badt['confidence']
  badf = fal.loc[fal["true_id"] == "bad", :]# and False ID
  badf_conf = badf['confidence']

  # Good
  goodt = tru.loc[tru["true_id"] == "good", :] # and True ID
  goodt_conf = goodt['confidence']
  goodf = fal.loc[fal["true_id"] == "good", :]# and False ID
  goodf_conf = goodf['confidence']
else:
  # Bad
  badt = tru.loc[tru["true_id"] == any([1,2]), :] # and True ID
  badt_conf = badt['confidence']
  badf = fal.loc[fal["true_id"] == any([1,2]), :]# and False ID
  badf_conf = badf['confidence']

  # Good
  goodt = tru.loc[tru["true_id"] == any([4,5]), :] # and True ID
  goodt_conf = goodt['confidence']
  goodf = fal.loc[fal["true_id"] == any([4,5]), :]# and False ID
  goodf_conf = goodf['confidence']

## Plot parameters
kwargs = dict(alpha=0.5, bins=15)
fig, (ax1, ax2) = plt.subplots(2, figsize=(10, 10), constrained_layout=True)
fig.suptitle('Prediction Confidence by Class\n Accuracy: {}'.format(format((len(tru)/(len(tru)+len(fal))),'.2f')))

# Bad
ax1.hist(badt_conf, color='y', label='True Det', **kwargs)
ax1.hist(badf_conf, color='r', label='False Det', **kwargs)
ax1.set_title("Bad - Classes 1&2 (n=50 images)\n Accuracy: {}".format(format((len(badt)/(len(badt)+len(badf))),'.2f')))
ax1.legend();

# Good
ax2.hist(goodt_conf, color='y', label='True Det', **kwargs)
ax2.hist(goodf_conf, color='r', label='False Det', **kwargs)
ax2.set_title("Good - Classes 4&5 (n=50 images)\n Accuracy: {}".format(format((len(goodt)/(len(goodt)+len(goodf))),'.2f')))
ax2.legend();

# Y-axis label
for ax in fig.get_axes():
    ax.set(ylabel='Freq (# imgs)')
    if thresh:
      ax.axvline(thresh, color='k', linestyle='dashed', linewidth=1)

# TO DO: Choose model name for exporting graphs
if int(num) == 18:
  model = 'mobilenetv2_18'
elif int(num) == 20:
  model = 'inceptionv3_20'
elif int(num) < 7:
  model = 'inceptionv3_06'
if taxon:
  if 'plant' in taxon:
    figname = model + '_plantae' + '.png'
  elif 'anim' in taxon:
    figname = model + '_animalia' + '.png'
else:
  figname = model + '.png'
print(figname)
#fig.savefig(figname)