<a href="https://colab.research.google.com/github/aubricot/computer_vision_with_eol_images/blob/master/classification_for_image_tagging/image_type/cartoonify_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Determine if images are a cartoon or photograph
---
*Last Updated 29 October  2021*   
Classification accuracy for illustrated images and phylogenies was low for the trained model. This notebook uses an alternate approach that leverages image processing to identify images as photographic or non-photographic. First, cartoonify image, then compare change in color values. If change above a certain threshold, then image is likely photographic. If change below a certain threshold, image is likely non-photographic.
  
***Using 500 images from all image type classes, the best predictor of "not cartoon" was found to be Manhattan norm per pixel > 2.***

## Installs & Imports
---

In [None]:
# Mount google drive to import/export files
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# For importing data and images
import pandas as pd
import numpy as np
import os
import scipy
from scipy.linalg import norm
from scipy import sum, average

# For working with images
from PIL import Image
import imageio
import cv2
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'

# Define functions

# Define start and stop indices in EOL bundle for running inference   
def set_start_stop(df):
    # To test with a tiny subset, use 5 random bundle images
    N = len(df)
    if test_with_tiny_subset:
        start=np.random.choice(a=N, size=1)[0]
        stop=start+5
    # To run for larger set, use 500 random images
    else: 
        start=np.random.choice(a=N, size=1)[0]
        stop=start+500
    print("\nCartoonizing images")
    
    return start, stop

# Set filename for saving classification results
def get_test_images(true_imclass):
    inpath = wd + '/pre-processing/images/' + true_imclass
    fns = os.listdir(inpath)
    TEST_IMAGE_PATHS = [os.path.join(inpath, fn) for fn in fns]
    print("Using test images from: \n", inpath)

    return TEST_IMAGE_PATHS

# Set filename for saving classification results
def set_outpath(true_imclass):
    outpath = wd + '/image_data/' + imclass + '_cartoonifcation_values.csv'
    print("Saving results to: \n", outpath)

    return outpath

# To cartoonize an image
def cartoonize(image):
    # Add edges
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
    gray = cv2.medianBlur(gray, 5) 
    edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,  
                                         cv2.THRESH_BINARY, 9, 9)  
    edges = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
    # Bilateral filter 
    color = cv2.bilateralFilter(img, 9, 250, 250) 
    img2 = cv2.bitwise_and(color, edges)

    return img2

# Calculate differences between original and cartoonized image
def calc_img_diffs(img, img2):
    # Convert both images from RGB to HSV
    HSV_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    HSV_img2 = cv2.cvtColor(img2, cv2.COLOR_RGB2HSV)
    # Fnd the difference for H of HSV values of the images
    diff = HSV_img[:,:,0]-HSV_img2[:,:,0]
    mnorm = sum(abs(diff))  # Manhattan norm
    mnorm_pp = mnorm/HSV_img.size # per pixel
    znorm = norm(diff.ravel(), 0)  # Zero norm
    znorm_pp = znorm*1.0/HSV_img2.size # per pixel

    return mnorm, mnorm_pp, znorm, znorm_pp

# To display an image already loaded into the runtime
def display_images(image, image2, mnorm, mnorm_pp, znorm, znorm_pp):
    fig, (a,b) = plt.subplots(2, figsize=(5, 5), constrained_layout=True)
    fig.suptitle("Original vs Cartoonized, pairwise differences\nManhattan norm: {} / per pixel: {}\
                  \nZero norm: {} / per pixel: {}".format(mnorm, mnorm_pp, znorm, znorm_pp))
    a.imshow(image) ;
    b.imshow(image2)

# Record results for confidence thresholds
# Make placeholder lists to fill for each class
def make_placeholders():
    filenames = []
    mnorms = []
    mnorms_pp = []
    znorms = []
    znorms_pp = []

    return filenames, mnorms, mnorms_pp, znorms, znorms_pp
    
# Add values for each image to placeholder list
def record_results(fn, mnorm, mnorm_pp, znorm, znorm_pp):
    filenames.append(fn)
    mnorms.append(mnorm)
    mnorms_pp.append(mnorm_pp)
    znorms.append(znorm)
    znorms_pp.append(znorm_pp)
    results = [filenames, mnorms, mnorms_pp, znorms, znorms_pp]

    return results

# Export results
def export_results(results):
    results = pd.DataFrame(results)
    results = results.transpose()
    results.to_csv(outpath, index=False, header=("filename", "mnorm", "mnorm_pp", 
                                                 "znorm", "znorm_pp"))
    
# To save the figure
def save_figure(fig, imclass):
    figname = wd + 'image_data/' + imclass + '_cartoonization_hists.png'
    fig.savefig(figname)
    print("Histograms saved to ", figname)

    return figname

## Cartoonization - compare cartoonized images to original
---

In [None]:
# Cartoonify images

# Set up directory structure
# TO DO: Type in the path to your working directory in form field to right
wd = "/content/drive/MyDrive/train" #@param {type:"string"}
cwd = wd + '/pre-processing/images/'
%cd $cwd

# Optional: Test downloads with a small subset first?
# TO DO: If yes, check test_with_tiny_subset box
test_with_tiny_subset = True #@param {type: "boolean"}
if test_with_tiny_subset:
    display_results = True

# Run through images to measure the difference from cartoonified and original
# For each image class
imclasses = ['herb', 'illus', 'map', 'null', 'phylo']
for imclass in imclasses:
    # Set filename for saving classification results
    outpath = set_outpath(imclass)

    # Make placeholder lists to record values for each image
    filenames, mnorms, mnorms_pp, znorms, znorms_pp = make_placeholders()

    # Get test images for cartoonizing
    TEST_IMAGE_PATHS = get_test_images(imclass)

    # Cartoonify images
    start, stop = set_start_stop(TEST_IMAGE_PATHS)
    for im_num, im_path in enumerate(TEST_IMAGE_PATHS[start:stop], start=1):
        # Read in image
        img = cv2.imread(im_path)
        
        # Cartoonization
        img2 = cartoonize(img) 

        # Calculate differences between original and cartoonized image
        mnorm, mnorm_pp, znorm, znorm_pp = calc_img_diffs(img, img2)

        # Display cartoonized image
        if display_results:
            display_images(img, img2, mnorm, mnorm_pp, znorm, znorm_pp)

        # Record results in placeholder lists to inspect results in next step
        results = record_results(im_path, mnorm, mnorm_pp, znorm, znorm_pp)

    # Combine to df and export results
    export_results(results)

### Inspect cartoonizaton results
---

In [None]:
# Combine model outputs for image type classes

# Get cartoonization files for each class
imclasses = ['herb', 'illus', 'map', 'null', 'phylo']
base = wd + '/image_data/'
all_filenames = [base + imclass + '_cartoonifcation_values.csv' for imclass in imclasses]

# Loop through cartoonization files and display histograms
for fn in all_filenames:
    print("Inspecting cartoonization values for: ", fn)
    df = pd.read_csv(fn, header=0)
    mnorms = df['mnorm']
    mnorms_pp = df['mnorm_pp']
    znorms = df['znorm']
    znorms_pp = df['znorm_pp']

    # Plot parameters
    kwargs = dict(alpha=0.5, bins=15)
    fig, (a, b, c, d) = plt.subplots(4, figsize=(10, 10), sharey=True, constrained_layout=True)
    fig.suptitle('Image differences after cartoonization (n={} imgs)'.format(len(df)))

    # Manhattan norm values
    bins, counts = np.histogram(mnorms)
    a.hist(mnorms, color='y', label='True Det', **kwargs)
    a.set_title("Manhattan norm");

    # Zero norm values
    bins, counts = np.histogram(znorms)
    c.hist(znorms, color='y', label='True Det', **kwargs)
    c.set_title("Zero norm");

    # Manhattan norm values per pixel
    bins, counts = np.histogram(mnorms_pp)
    b.hist(mnorms_pp, color='y', label='True Det', **kwargs)
    b.set_title("Manhattan norm per pixel");

    # Zero norm values per pixel
    bins, counts = np.histogram(znorms_pp)
    d.hist(znorms_pp, color='y', label='True Det', **kwargs)
    d.set_title("Zero norm per pixel");

    # Export histograms
    figname = save_figure(fig)