In [None]:
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import random
from skimage.filters import (threshold_otsu, threshold_niblack,threshold_sauvola)
import numpy as np

random.seed(10)

# Exploratory

In [None]:
#TODO introduce a variabl n to increase speed while debugging
def get_raw_data(path):
    p = Path(path).glob('**/*.jpg')
    files = [x for x in p if x.is_file()]
    #return files
    imgs = {}
    for file in files:
        imgs[str(file)] = cv2.imread(str(file))
    return imgs

In [None]:
path = '/Users/beantown/PycharmProjects/master-thesis/data/raw'
files = get_raw_data(path)

In [None]:
def show_example(files, keep_size=True, n=1, hide_spines=False, gray=False, add_to_title=None):
    for i in range(n):
        dpi = 80

        key, value = random.choice(list(files.items()))
        if not gray:
            im_data = cv2.cvtColor(value, cv2.COLOR_BGR2RGB)
        else:
            im_data = value.copy()
            
        title = str(key).split('/')[-1].split('.')[0]
        if add_to_title != None:
            title = add_to_title + ': ' + title


        if keep_size:
            if gray:
                height, width = im_data.shape
            else:
                height, width, _ = im_data.shape

            # What size does the figure need to be in inches to fit the image?
            figsize = width / float(dpi), height / float(dpi)

            # Create a figure of the right size with one axes that takes up the full figure
            fig = plt.figure(figsize=figsize)
            ax = fig.add_axes([0, 0, 1, 1])

            # Hide spines, ticks, etc.
            #ax.axis('off')
            if hide_spines:
                ax.axis('off')
            else:
                ax.tick_params(axis='both', which='major', labelsize=40)
                ax.tick_params(axis='both', which='minor', labelsize=30)
            
            ax.set_title(title, pad=30, fontsize=50)

            # Display the image.
            if gray:
                ax.imshow(im_data, cmap='gray')
            else:
                ax.imshow(im_data)
        else:
            plt.title(title, pad=20, fontsize=20)
            if hide_spines:
                plt.axis('off')
            if gray:
                plt.imshow(im_data, cmap='gray')
            else:
                plt.imshow(im_data)

        plt.show()    

In [None]:
show_example(files, keep_size=False, n=2,  hide_spines=False)

In [None]:
key, value = random.choice(list(files.items()))
test_file = {key:value}
show_example(test_file, keep_size=True, n=1,  hide_spines=False)

In [None]:
def get_forground(files, method='otsu'):
    # use a specific method if nothing else is given or it uses global theshold with otsu as default
    forground_files = {}
    window_size = 25
    if method == 'niblack':
        for key, value in files.items():
            # Prprocessing
            image = cv2.cvtColor(value, cv2.COLOR_BGR2GRAY)
            #blurred = cv2.GaussianBlur(im_gray, (7, 7), 0)
            thresh_niblack = threshold_niblack(image, window_size=window_size, k=0.8)
            binary_niblack = image > thresh_niblack
            forground_files[key] = binary_niblack
            
    elif method == 'sauvola':
        for key, value in files.items():
            # Prprocessing
            image = cv2.cvtColor(value, cv2.COLOR_BGR2GRAY)
            #blurred = cv2.GaussianBlur(im_gray, (7, 7), 0)
            thresh_sauvola = threshold_sauvola(image, window_size=window_size)
            binary_sauvola = image > thresh_sauvola
            forground_files[key] = binary_sauvola
    
            
    else:
         for key, value in files.items():
            # Prprocessing
            image = cv2.cvtColor(value, cv2.COLOR_BGR2GRAY)
            #blurred = cv2.GaussianBlur(im_gray, (7, 7), 0)
            binary_global = image > threshold_otsu(image)
            forground_files[key] = binary_global
            
    return forground_files

In [None]:
otsu_files = get_forground(files,)
sauvola_files = get_forground(files, method = 'sauvola')
niblack_files = get_forground(files, method = 'niblack')

In [None]:
otsu_img = {key: otsu_files[key]}
sauvola_img = {key: sauvola_files[key]}
niblack_img =  {key: niblack_files[key]}

In [None]:
show_example(otsu_img, keep_size=True, n=1,  hide_spines=False, gray=True, add_to_title='otsu_img')
show_example(sauvola_img, keep_size=True, n=1,  hide_spines=False, gray=True, add_to_title='sauvola_img')
show_example(niblack_img, keep_size=True, n=1,  hide_spines=False, gray=True, add_to_title='niblack_img')