# Using Scikit-Image to do Arrow Matching

## This notebook is working on creating the arrow classifer for the reaction schemes

In [1]:
import os
import cv2
import math 
import imutils
import json
import copy
import time
import logging

import scipy
import numpy as np
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
import pandas as pd

import pytesseract
from pytesseract import Output

import sklearn
from sklearn.cluster import KMeans

import skimage
from skimage import io
from skimage.util import pad
from skimage.color import rgb2gray
from skimage.measure import regionprops
from skimage.measure import find_contours
from skimage.util import crop as crop_skimage
from skimage.util import random_noise
from skimage.morphology import binary_closing, disk
from skimage.morphology import skeletonize as skeletonize_skimage

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import RMSprop

import sklearn
from sklearn.cluster import KMeans
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from arrow import *
from scikit_arrow_utils import arrow_average, arrow_centroid, line_mag, get_contour_height, get_contour_length, get_orientation
from image_utils import binary_close, binarize, binary_floodfill, skeletonize, pixel_ratio, skeletonize_area_ratio
from scikit_scripts import pad_image, segment_image, show_contours, get_image_contours
from detection import get_direction, pipeline

In [6]:
model = keras.models.load_model(os.path.join(os.getcwd(), 'models', 'notOverfittedModel2'))

# trainX, trainY = load_training_set()
# # testX, testY = load_testing_set()

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 498, 498, 16)      160       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 249, 249, 16)      0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 249, 249, 16)      0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 247, 247, 32)      4640      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 123, 123, 32)      0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 123, 123, 32)      0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 121, 121, 32)     

In [3]:
def process_paper(model, path_to_images, document_name, save_in_json = True, verbose=1):
    reactions = sorted(os.listdir(os.path.join(os.getcwd(), path_to_images)))[1:]
    arrows = {}
    count = 0
    for i, image_name in enumerate(reactions):
        image = io.imread(os.path.join(path_to_images, image_name))
        try:
            info, cnts, averages, directions = pipeline(image,  image_name, count, model=model,verbose=verbose)
            count += 1
        except:
            print(image_name + ' had an Error')
            info = {}
            info['Status'] = 'Error Thrown'
            continue
        arrows[image_name] = info
    if save_in_json:
        with open(os.path.join(os.getcwd(), 'results', document_name + '_results.json'), 'w') as f:
            json.dump(arrows, f)
    return arrows

In [4]:
reactions1 = os.path.join(os.getcwd(), 'test_reaction_ims')

arrows = process_paper(model, 'test_reaction_ims', 'test_ims3')

Label 0 CroppedPage13-1.png 1 Arrows Extracted! Time Elapsed: 4.13s
Label 1 CroppedPage15-1.png 1 Arrows Extracted! Time Elapsed: 4.67s
Label 2 CroppedPage18-1.png 1 Arrows Extracted! Time Elapsed: 7.30s
Label 3 CroppedPage19-1.png 1 Arrows Extracted! Time Elapsed: 2.93s
Label 4 CroppedPage20-1.png 1 Arrows Extracted! Time Elapsed: 4.07s
Label 5 CroppedPage21-1.png 1 Arrows Extracted! Time Elapsed: 3.07s
Label 6 CroppedPage3-2.png 1 Arrows Extracted! Time Elapsed: 0.26s
Label 7 CroppedPage33-1.png 1 Arrows Extracted! Time Elapsed: 0.31s
Label 8 CroppedPage33-2.png 1 Arrows Extracted! Time Elapsed: 0.45s
Label 9 CroppedPage34-1.png 1 Arrows Extracted! Time Elapsed: 0.97s
Label 10 CroppedPage35-1.png 1 Arrows Extracted! Time Elapsed: 0.48s
Label 11 CroppedPage36-1.png 1 Arrows Extracted! Time Elapsed: 0.44s
Label 12 CroppedPage37-1.png 1 Arrows Extracted! Time Elapsed: 0.38s
Label 13 CroppedPage4-1.png 1 Arrows Extracted! Time Elapsed: 0.21s
Label 14 CroppedPage4-2 2.png 1 Arrows Extract

In [5]:
reactions1 = os.path.join(os.getcwd(), 'test_reaction_ims')
reactions = sorted(os.listdir(reactions1))[1:]
idx = reactions.index('ja9b07512_si_001Page14-1.png')
image = io.imread(os.path.join(reactions1, reactions[2]))
# image = binarize(image)
# image.shape
# seg_im = segment_image(image)
info, final_contours, averages, directions = pipeline(image, doc_name = '', count = 0)
# for i, item in enumerate(reactions):
#     print("Element " + str(i) + ': ' + item)

Label 0  1 Arrows Extracted! Time Elapsed: 7.28s


In [None]:
show_contours(image, final_contours, num = -1)
print(info)

In [None]:
get_contour_height(final_contours[0])