# Workshop Notebook 8: Advanced ExifTool and AutoCropper

## Mandatory Disclosures

1. This is a whirlwind introduction, not exhaustive instruction
1. All images are by courtesy of the University Archives at Texas State University: http://www.univarchives.txstate.edu
1. img_qc_workshop is licensed under the GNU General Public License v3.0, https://github.com/photosbyjeremy/img_qc_workshop/blob/master/LICENSE
1. *Any and all code provided is done so without any warranty or expectation of support by Jeremy Moore, Todd Peters, or Texas State University*

In [None]:
current_project = 'workshop-7'  # RENAME TITLE OF JUPYTER NOTEBOOK TO MATCH

In [None]:
# === IMPORT ===
# built-in libraries
import logging
from math import isnan
from pathlib import Path

# installed libraries
import cv2
import ipywidgets as widgets
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

# local libraries
import img_qc.img_qc as img_qc

# magic that lets us plot directly in the notebook
%matplotlib inline

# parameters for matplotlib to increase our default figure size -- NOTE: figure sizes are in INCHES
plt.rcParams["figure.figsize"] = (6, 10)  # set as needed for your screen and eyes

# on a high-dpi monitor this will increase the quality of plots on-screen
%config InlineBackend.figure_format = 'retina'

In [None]:
# === DIRECTORIES ===

# set current directory
current_directory = Path.cwd()

# in case we get fancy and change our current directory later
start_directory = current_directory

# set home directory
home_directory = Path.home()

# set quality control directory as hard-coded: Path.cwd()/data/demo-images_no-crop/{current_project}
qc_directory = start_directory.joinpath('data', current_project)

In [None]:
# === OPTIONS ===
# Jupyter Notebook magic
%matplotlib inline

# Pandas options
pd.set_option('display.max_colwidth', -1)  # won't truncate output such as Path names

# Logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    level=logging.INFO)

logging.info(f'Current Project: {current_project}')

In [None]:
print(f'qc_directory : {qc_directory}')

In [None]:
# === LOAD IMAGE METADATA ===
images_df = img_qc.get_images_df(str(qc_directory), 'tif')

In [None]:
# === DATAFRAME DROPDOWNS===
df_columns = list(images_df.columns.values)

def print_column(value, column):
    value = str(value)
    column = str(column)
    if value == 'SELECT A VALUE BELOW':
        pass
    else:
        print(f"# of images in {column_widget.kwargs['column']} == {value_widget.kwargs['value']}")
        print(len(images_df[images_df[column_widget.kwargs['column']]==value_widget.kwargs['value']]))
        new_dataframe = images_df[images_df[column_widget.kwargs['column']]==value_widget.kwargs['value']]
        return new_dataframe    
    
    
def select_column(column):
    column_values_list = []
    if column != 'SELECT A COLUMN':
        
        for column_value in images_df[column].unique():
            # check if it's a string
            if isinstance(column_value, str):
                # add to values list
                column_values_list.append(column_value)
            # skip it if it's a nan value
            # nan, or null, from Pandas breaks in the widgets as it's not
            # actually selectable because there's nothing there
            # Can rule it out by testing with the function math.isnan(x)
            elif isnan(column_value):
                pass
            # add whatever else there is
            else:
                column_values_list.append(column_value)
    # sort the list
    # print(column_values_list)
    print(f'# of Unique Values: {len(column_values_list)}')
    column_values_list = sorted(column_values_list)
    column_values_list.insert(0, 'SELECT A VALUE BELOW')
    select2.options = column_values_list   

    
select1 = widgets.Dropdown(options=df_columns)
initial_value = 'SourceFile'
select2 = widgets.Dropdown(options=images_df[initial_value])
column_widget = widgets.interactive(select_column, column=select1)
value_widget = widgets.interactive(print_column, value=select2, column=select1)
display(column_widget)
display(value_widget)

# to access the column, use : column_widget.kwargs['column']
# to access the value, use: value_widget.kwargs['value']
# to access the dataframe where column == value: value_widget.result

In [None]:
# === AutoCrop Functions
def get_crop_contour(value,
                     autocrop_height,
                     output_height,
                     padding,
                     slide_x,
                     size_x,
                     slide_y,
                     size_y
                    ):
    
    # turn value into image
    image = value
    
    # load the image
    image = cv2.imread(image)
    
    # compute the ratio of the old height to the new height
    ratio = image.shape[0] / autocrop_height  # shape[0] = height, 1 = width, 2 = channels
    
    # clone the image
    original = image.copy()
    
    # resize the image
    resized = img_qc.get_resized_cv_image(image, height=autocrop_height)
    
    # convert the image to grayscale
    gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    
    # blur the image
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # apply Otsu's thresholding as we expect a dark vs light tone
    (T, thresh) = cv2.threshold(blurred, 0, 255, 
                               cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # find the contours in the thresholded image keeping the external one
    thresholded_image, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
                                                 cv2.CHAIN_APPROX_SIMPLE)
    
    # sort the contours from left to right
    (contours, bounding_boxes) = img_qc.sort_contours(contours)
    bounding_box = ''
    
    # loop over the contours individually
    for (index, contour) in enumerate(contours):
        # if the contour is not sufficiently large, ignore it
        if cv2.contourArea(contour) < 700:
            continue
        
        # compute the rotated bounding box of the contour
        bounding_box = cv2.minAreaRect(contour)
        bounding_box = cv2.boxPoints(bounding_box)
        bounding_box = np.int0(bounding_box)
        
        # re-order the points in top_left, top_right, bottom_right, bottom_left order
        rectangle = img_qc.order_points(bounding_box)
        
        # clone the image and find the points and angle for minAreaRectangle
        clone = image.copy()
        (x, y), (w, h), theta = cv2.minAreaRect(contour)
        
        # rotate the image around center of minAreaRect by theta amount
        if theta < -45:
            theta = 90 + theta
        rotated_image = img_qc.rotate(clone, theta, (x, y))
        
    # multiply the rectangle by the original ratio
    rectangle *= ratio
    
    # rotated orig by theta from minAreaRect
    x *= ratio
    y *= ratio
    rotated = img_qc.rotate(original, theta, (x, y))
    
    # find the points we need to crop the full size original
    top_left, top_right, bottom_right, bottom_left = rectangle
    
    # start in the top_left and end in the bottom_right
    # top_left[0] is x coordinate, top_left[1] is y coordinate
    
    # set startX to the minimum of top_left and bottom_left points
    startX = min(top_left[0], bottom_left[0])
    # Get the maximum of startX and 0 so we're sure to start IN the image
    startX = max(startX, 0)
    
    # set startY to the minimum of top_left and top_right
    startY = min(top_left[1], top_right[1])
    # Get the maximum of startY and 0 so we start IN the image
    startY = max(startY, 0)
    
    # set endX with maximum of top_right and bottom_right points
    endX = max(top_right[0], bottom_right[0])
    
    # set endY with maximum of bottom_left and bottom_right points
    endY = max(bottom_left[1], bottom_right[1])
    
    pixel_padding = int(padding)
    
    startX -= pixel_padding
    startX -= (0.5 * int(size_x))
    startX += slide_x
    # debug: set startX and startY to max of their current value and 0
    startX = max(startX, 0)
    
    startY -= pixel_padding
    startY -= (0.5 * int(size_y))
    startY -= slide_y
    startY = max(startY, 0)
    
    endX += pixel_padding
    endX += (0.5 * int(size_x))
    endX += slide_x
    # if endX is greater than width of image shape then return image shape
    if endX > image.shape[1]:
        endX = image.shape[1]
        
    endY += pixel_padding
    endY += (0.5 * int(size_y))
    endY -= slide_y
    if endY > image.shape[0]:
        endY = image.shape[0]
    
    # set bounding box to (startX, startY), (endX, endY)
    startX, startY, endX, endY = int(startX), int(startY), int(endX), int(endY)
    
    image_with_contour = cv2.rectangle(rotated, (startX, startY), (endX, endY), (0, 0, 255), 10)
    
    crop_width = endX - startX
    crop_height = endY - startY
    print(f'crop width: {crop_width}')
    print(f'crop height: {crop_height}')
    print('')
    print('')
    # resize the image
    resized = img_qc.get_resized_cv_image(image_with_contour, height=output_height)
    # show image
    show_cv2_image(resized)
    
        
    # return the image the bounding box
    return bounding_box, rectangle, theta, (x, y)

def draw_crop_contour(value, output_height, bounding_box):
    
    # set image from value
    image = value
    
    # load the image
    image = cv2.imread(image)
    
    # compute the rotated bounding box of the contour
    #print(f'bounding_box1: {bounding_box}')
    bounding_box = np.int0(bounding_box)
    #print(f'bounding_box3: {bounding_box}')
    # draw contour on the box in red
    cv2.drawContours(image, [bounding_box], 0, (0, 0, 255), 15)
    
    # resize the image
    resized = img_qc.get_resized_cv_image(image, height=height)
    image_with_contour = resized
    
    return image_with_contour

def show_cv2_image(image):
    # if shape length is >2 then we have multiple channels
    # (image.shape[0] = height, image.shape[1])
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.rcParams['figure.dpi'] = '175'
    plt.imshow(image)
    plt.show()

In [None]:
# === AutoCrop Widget

autocrop_height_slider = widgets.IntSlider(description='crop height', min=100, max=1000, step=100, value=500, continuous_update=False)
output_height_slider = widgets.IntSlider(description='qc height', min=50, max=1000, step=50, value=600, continuous_update=False)
padding_slider = widgets.IntSlider(description='padding', min=-500, max=500, step=10, value=50, continuous_update=False)
slide_x_slider = widgets.IntSlider(description='slide x', min=-500, max=500, step=10, value=0, continuous_update=False)
size_x_slider = widgets.IntSlider(description='size x', min=-500, max=500, step=10, value=0, continuous_update=False)
slide_y_slider = widgets.IntSlider(description='slide y', min=-500, max=500, step=10, value=0, continuous_update=False)
size_y_slider = widgets.IntSlider(description='size y', min=-500, max=500, step=10, value=0, continuous_update=False)

row1 = widgets.HBox([autocrop_height_slider, output_height_slider, padding_slider])
row2 = widgets.HBox([slide_x_slider, size_x_slider,])
row3 = widgets.HBox([slide_y_slider, size_y_slider])
ui = widgets.VBox([row1, row2, row3])

out = widgets.interactive_output(get_crop_contour, {'value': select2,
                                                    'autocrop_height': autocrop_height_slider,
                                                    'output_height': output_height_slider,
                                                    'padding': padding_slider,
                                                    'slide_x': slide_x_slider,
                                                    'size_x': size_x_slider,
                                                    'slide_y': slide_y_slider,
                                                    'size_y': size_y_slider
                                                   })
out.layout.height = '800px'

display(ui, out)