In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [None]:
from fastai import *
from fastai.vision import *
import pdb

In [None]:
from scipy import signal,ndimage
import statistics

In [None]:
PATH = Path('data/IAM_handwriting')

# Helpers

In [None]:
# random image from folder
def rand_from_folder(f_path):
    files = [f for f in os.listdir(f_path) if not f.startswith(".")]  # remove .DS_Store and other hidden files
    fname = random.choice(files)
    return PIL.Image.open(f_path/fname).convert('L')

In [None]:
def array_stats(im):
    arr = np.array(im)
    print(f"mean: {arr.mean()}, std: {arr.std()}, min: {arr.min()}, max: {arr.max()}")
    return arr

# global stats - paragraphs

In [None]:
fname = 'edited_pg.csv' #'paragraphs.csv'
CSV = PATH/fname
FOLDER = 'paragraphs'

df = pd.read_csv(CSV)
len(df)

In [None]:
data = ImageList.from_df(df, path=PATH, folder=FOLDER).split_none()

In [None]:
fig, axes = plt.subplots(3,3, gridspec_kw={'hspace': 0.4}, figsize=(18, 10))
for i,ax in enumerate(axes.flat):
    ax=show_image(data.train[i], ax=ax)

## Determine average line length

In [None]:
df['char_len'] = df.label.map(len)
df.char_len.values.max()

In [None]:
lines = [pg.split('\n')[:-1] for pg in df.label.values]   # remove final line per label (incomplete)
lines = functools.reduce(operator.iconcat, lines, [])     # flatten lines list

In [None]:
lens = np.array(list(map(lambda x: len(x), lines)))
lens.mean(), lens.std()

In [None]:
plt.hist(lens)

In [None]:
# image w/ longest lines
im = PIL.Image.open(PATH/'paragraphs'/'c04-110.png')
im

In [None]:
# line lengths of above image
lens[3657:3661]

# View individual images

In [None]:
im = rand_from_folder(PATH/'uploads')
im = im.resize((512,512))
im

In [None]:
arr = np.array(im)
arr.mean(), arr.std(), arr.min(), arr.max()

In [None]:
arr.shape

In [None]:
def show_lines(arr):
    stds = arr.std(axis=1)
    g_stds = scipy.ndimage.gaussian_filter1d(stds, 5)
    peaks,_ = scipy.signal.find_peaks(g_stds, prominence=5, height=stds.mean(), distance=20)
    mins = scipy.signal.argrelextrema(g_stds, np.less_equal)[0]  # np.less_equal critical for flat minima, edges
    fig, axs = plt.subplots(math.ceil(len(peaks)/2),2,figsize=(18,15))
    for p,ax in zip(peaks, axs.flatten()):
        line = range(mins[mins < p][-1], mins[mins > p][0])
        im = PIL.Image.fromarray(arr[line])
        ax.imshow(im)

In [None]:
show_lines(arr)

In [None]:
stds = arr.std(axis=1)
means = arr.mean(axis=1)

In [None]:
plt.bar(list(range(0,512)), stds)

In [None]:
g_stds = scipy.ndimage.gaussian_filter1d(stds, 5)   # gaussian smoothing
plt.bar(list(range(0,512)), g_stds)

In [None]:
stds.mean(), stds.std()

In [None]:
peaks,_ = scipy.signal.find_peaks(g_stds, prominence=5, height=stds.mean(), distance=20)

In [None]:
peaks

In [None]:
x = np.array([0, 2, 1, 2, 3, 2, 0, 1, 0])
scipy.signal.argrelextrema(x, np.less_equal)[0]

In [None]:
minima = scipy.signal.argrelextrema(g_stds, np.less_equal)[0]
minima

In [None]:
mins[mins < peaks[0]][-1]

In [None]:
widths = []
for p in peaks:
    widths.append(range(mins[mins < p][-1], mins[mins > p][0]))

In [None]:
PIL.Image.fromarray(arr[widths[5]])

In [None]:
fig, axs = plt.subplots(7,2,figsize=(18,20))
for w,ax in zip(widths, axs.flatten()):
    im = PIL.Image.fromarray(arr[w])
    ax.imshow(im)

In [None]:
idxs = list(range(392-55, 392+55))
PIL.Image.fromarray(arr[idxs])

In [None]:
# with np.printoptions(threshold=np.inf, linewidth=np.inf):   # context manager to view large array without truncation
#     print(arr)

In [None]:
# plt.rcParams["figure.figsize"] = [12,9]

plt.imshow(im, cmap='hot')
plt.colorbar()
plt.show()

In [None]:
mask = arr < np.percentile(arr, 6)

plt.imshow(mask.astype(int), cmap='hot')
plt.colorbar()
plt.show()

In [None]:
mask = arr > np.percentile(arr, 6)

plt.imshow(mask.astype(int), cmap='hot')
plt.colorbar()
plt.show()

In [None]:
import scipy
from scipy import ndimage

In [None]:
mask = np.greater(arr, np.percentile(arr, 6))
np.putmask(arr, mask, 255)

plt.imshow(ndimage.gaussian_filter(arr, 2), cmap='hot')
plt.colorbar()
plt.show()

# Test Images

In [None]:
dl_path = Path('data/fonts/test')

In [None]:
t_im = rand_from_folder(dl_path)
t_im

In [None]:
t_im = PIL.Image.open(dl_path/'2.png').convert('L')
t_im

In [None]:
t_arr = array_stats(t_im)
t_arr

In [None]:
plt.hist(t_arr.flatten())

In [None]:
plt.hist(bg_arr.flatten())

In [None]:
import scipy.stats as stats

lower, upper = 200, 251
mu, sigma = 242, 10
dist = stats.truncnorm((lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)
trunc_norm_dist = dist.rvs(300)

In [None]:
plt.hist(trunc_norm_dist)

In [None]:
def random_wrap_bg(): return np.random.choice(trunc_norm_dist)

# Background noise

In [None]:
def skew_pixels(im, brightness=0.5, sharpness=-0.5):
    '''increase brightness to lighten darks
       decrease sharpness to spread values out
    '''
    enhance = PIL.ImageEnhance.Brightness(im)
    im = enhance.enhance(1.0 + brightness)  # increase brightness
    enhance = PIL.ImageEnhance.Sharpness(im)
    im = enhance.enhance(1.0 + sharpness)  # decrease sharpness
    return im

In [None]:
def overlay_bg(im, bg_img=None):
    if bg_img is None:
        bg_img = rand_from_folder(PATH/'paragraphs')
    blur = 70 #random.randint(70,150) 
    bg = bg_img.filter(PIL.ImageFilter.GaussianBlur(blur))
    # random flips
    choices = [PIL.Image.FLIP_LEFT_RIGHT, PIL.Image.FLIP_TOP_BOTTOM, PIL.Image.ROTATE_90, PIL.Image.ROTATE_180, PIL.Image.ROTATE_270]
    bg.transpose(random.choice(choices))

    return PIL.ImageChops.darker(im, bg.resize(im.size))
#     diff = PIL.ImageChops.difference(im, bg.resize(im.size))
#     return PIL.ImageChops.subtract(im, diff)

In [None]:
def gaussian_noise(im):
    arr = np.array(im)
    out = np.clip(arr, 45, 230) + np.random.normal(0,4, arr.shape)
    out = out.astype('uint8')
    #print(f"mean: {out.mean()}, std: {out.std()}, max: {out.max()}, min: {out.min()}")
    return PIL.Image.fromarray(out, mode='L')

In [None]:
def poisson_noise(image, mod=1.5):
    image = np.array(image)
    # Determine unique values in image & calculate the next power of two
    vals = len(np.unique(image))
    vals = mod ** np.ceil(np.log2(vals))

    # Generating noise for each unique value in image.
    out = np.random.poisson(image * vals) / float(vals)
    return PIL.Image.fromarray(out.astype('uint8'), mode='L')

In [None]:
res = skew_pixels(res)
plt.imshow(res)

In [None]:
# overlay bg
res = overlay_bg(res)
plt.imshow(res) 

In [None]:
res

In [None]:
# gaussian noise
gtim = gaussian_noise(t_im)
gtim

In [None]:
gtarr = array_stats(gtim)

In [None]:
gtarr

In [None]:
ptim = poisson_noise(t_im)
ptim

# Crop and Pad

In [None]:
def crop_image(im):    
    bg = PIL.Image.new(im.mode, im.size, im.getpixel((0,0)))
    diff = PIL.ImageChops.difference(im, bg)
    diff = PIL.ImageChops.add(diff, diff, 2.0, -100)
    box = diff.getbbox()
    return im.crop(box)

In [None]:
def pad_image(im, pad_range=(10,50)):
    "adjusted to minimize large ratio differences"
    row,col = im.size
    bg_color = im.getpixel((0,0))
    l = int(random.randint(*pad_range) * col/row)
    u = int(random.randint(*pad_range) * row/col)
    r = int(random.randint(*pad_range) * col/row)
    b = int(random.randint(*pad_range) * row/col)
    #print(l,u,r,b)
    return PIL.ImageOps.expand(im, border=(l,u,r,b), fill=bg_color)

In [None]:
# crop
res = crop_image(t_im)
plt.imshow(res)

In [None]:
# pad
res = pad_image(res)
plt.imshow(res)

# Script

In [None]:
ntim = overlay_bg(skew_pixels(pad_image(crop_image(t_im))))
ntim

In [None]:
poisson_noise(ntim, mod=2.1)

In [None]:
f_path = Path('data/fonts')
src_path = f_path/'imdb'
targ_path = f_path/'imdb_res'

os.makedirs(targ_path, exist_ok=True)

In [None]:
def process_one(fpath, i=None, targ_path=None, prepend='', crop=True, pad=True, skew=True, bg=True):
    if fpath.suffix != '.png': return
    img = PIL.Image.open(fpath).convert('L')
    if crop: img = crop_image(img)
    if pad: img = pad_image(img)
    if skew: img = skew_pixels(img)
    if bg: img = overlay_bg(img)
    img = poisson_noise(img, mod=1.8)
#     img = gaussian_noise(img)
    #img = img.resize((512,512))
    if targ_path is None:
        return img
    else:
        name = prepend + fpath.name
        img.save(targ_path/name)

In [None]:
il = ImageList.from_folder(src_path)
parallel(partial(process_one, targ_path=targ_path), il.items)

## Poisson images

In [None]:
f_path = Path('data/handwriting_images')
src_path = f_path/'poisson'
targ_path = f_path/'v10'

os.makedirs(targ_path, exist_ok=True)

In [None]:
def process_one(fpath, i=None, targ_path=None, prepend=''):
    if fpath.suffix != '.png': return
    img = PIL.Image.open(fpath).convert('L')
#     img = poisson_noise(img, 2.1)
    img = gaussian_noise(img)
    img = overlay_bg(img)
    #img = img.resize((512,512))
    if targ_path is None:
        return img
    else:
        name = prepend + fpath.name
        img.save(targ_path/name)

In [None]:
il = ImageList.from_folder(src_path)
parallel(partial(process_one, targ_path=targ_path, prepend='v10_'), il.items)

# Experiments

## Bounding Boxes

In [None]:
im = PATH/'uploads'/'test2.png'
im = PIL.Image.open(im).convert('L')
im

In [None]:
bg = PIL.Image.new(im.mode, im.size, im.getpixel((0,0)))
diff = PIL.ImageChops.difference(im, bg)
diff = PIL.ImageChops.add(diff, diff, 2.0, -100)

In [None]:
box = diff.getbbox()

In [None]:
box

In [None]:
crop_image(img)

In [None]:
plt.hist(im, bins=10)

In [None]:
im = np.array(im)

mask = (im > im.mean()).astype(np.float)
mask += 0.1 * im
img = mask + 0.2*np.random.randn(*mask.shape)

hist, bin_edges = np.histogram(img, bins=60)
bin_centers = 0.5*(bin_edges[:-1] + bin_edges[1:])
binary_img = img > 0.5

In [None]:
plt.imshow(binary_img, cmap=plt.cm.gray, interpolation='nearest')

In [None]:
plt.plot(bin_centers, hist, lw=2)

## Scipy histogram segmentation

In [None]:
from scipy import ndimage

In [None]:
n = 10
l = 256
im = np.zeros((l, l))
# np.random.seed(1)
points = l*np.random.random((2, n**2))
im[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1
im = ndimage.gaussian_filter(im, sigma=l/(4.*n))

mask = (im > im.mean()).astype(np.float)
mask += 0.1 * im
img = mask + 0.2*np.random.randn(*mask.shape)

hist, bin_edges = np.histogram(img, bins=60)
bin_centers = 0.5*(bin_edges[:-1] + bin_edges[1:])
binary_img = img > 0.5

In [None]:
plt.figure(figsize=(11,4))

plt.subplot(131)
plt.imshow(img)
plt.axis('off')
plt.subplot(132)
plt.plot(bin_centers, hist, lw=2)
plt.axvline(0.5, color='r', ls='--', lw=2)
plt.text(0.57, 0.8, 'histogram', fontsize=20, transform = plt.gca().transAxes)
plt.yticks([])
plt.subplot(133)
plt.imshow(binary_img, cmap=plt.cm.gray, interpolation='nearest')
plt.axis('off')

plt.subplots_adjust(wspace=0.02, hspace=0.3, top=1, bottom=0.1, left=0, right=1)
plt.show()

## PG text analysis

In [None]:
im = rand_from_folder(PATH/'paragraphs')
im

In [None]:
arr = np.array(im)

In [None]:
new_arr = arr[50:110, 170:258]

In [None]:
with np.printoptions(threshold=np.inf, linewidth=np.inf):   # context manager to view large array without truncation
    print(new_arr[:,:28])

In [None]:
fig, ax = plt.subplots(figsize=(10,10))
im = ax.imshow(new_arr, cmap='hot', interpolation='nearest')
plt.colorbar(im)
plt.show()

In [None]:
image_stats(im)

In [None]:
plt.hist(im.getdata(), bins=20)

In [None]:
image_stats(res)

In [None]:
plt.hist(res.getdata(), bins=20)

In [None]:
plt.hist(tim.getdata(), bins=20)

## Test image text analysis

In [None]:
tim = rand_from_folder(Path('data/fonts/adrift'))
tim

In [None]:
tarr = np.array(tim)
tarr

In [None]:
new_tarr = tarr[75:150, 50:78]

In [None]:
with np.printoptions(threshold=np.inf, linewidth=np.inf):   # context manager to view large array without truncation
    print(new_tarr)

In [None]:
fig, ax = plt.subplots(figsize=(10,10))
image = ax.imshow(new_tarr, cmap='hot', interpolation='nearest')
plt.colorbar(image)
plt.show()

In [None]:
enhance = PIL.ImageEnhance.Brightness(tim)
timo = enhance.enhance(1.5)  # increase brightness
enhance = PIL.ImageEnhance.Sharpness(timo)
timo = enhance.enhance(0.5)  # decrease sharpness

In [None]:
bg_img = rand_from_folder(PATH/'paragraphs')
# blur = random.randint(70,150) 
bg = bg_img.filter(PIL.ImageFilter.GaussianBlur(70))

choices = [PIL.Image.FLIP_LEFT_RIGHT, PIL.Image.FLIP_TOP_BOTTOM, PIL.Image.ROTATE_90, PIL.Image.ROTATE_180, PIL.Image.ROTATE_270]
bg.transpose(random.choice(choices))

res=PIL.ImageChops.darker(timo, bg)
res

In [None]:
# adjusted test image
plt.hist(res.getdata(), bins=20)

In [None]:
# test image no changes
plt.hist(tim.getdata(), bins=20)

In [None]:
# baseline dataset image
plt.hist(im.getdata(), bins=20)

## Histogram matching

In [None]:
def hist_match(source, template):
    """
    Adjust the pixel values of a grayscale image such that its histogram
    matches that of a target image

    Arguments:
    -----------
        source: np.ndarray
            Image to transform; the histogram is computed over the flattened
            array
        template: np.ndarray
            Template image; can have different dimensions to source
    Returns:
    -----------
        matched: np.ndarray
            The transformed output image
    """

    oldshape = source.shape
    source = source.ravel()
    template = template.ravel()

    # get the set of unique pixel values and their corresponding indices and counts
    s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,return_counts=True)
    t_values, t_counts = np.unique(template, return_counts=True)

    # take the cumsum of the counts and normalize by the number of pixels to
    # get the empirical cumulative distribution functions for the source and
    # template images (maps pixel value --> quantile)
    s_quantiles = np.cumsum(s_counts).astype(np.float64)
    s_quantiles /= s_quantiles[-1]
    t_quantiles = np.cumsum(t_counts).astype(np.float64)
    t_quantiles /= t_quantiles[-1]

    # interpolate linearly to find the pixel values in the template image
    # that correspond most closely to the quantiles in the source image
    interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)

    return interp_t_values[bin_idx].reshape(oldshape)

In [None]:
nim = hist_match(np.array(res),np.array(im))

In [None]:
nim = PIL.Image.fromarray(nim.astype('uint8'), mode='L')
nim

In [None]:
plt.hist(ntim.getdata(), bins=20)

In [None]:
plt.hist(im.getdata(), bins=20)

## creating font images via PIL

In [None]:
from PIL import Image, ImageDraw, ImageFont

In [None]:
font.getsize('i')

In [None]:
font.getsize('j')

In [None]:
font.size

In [None]:
bg = random.randrange(230,255)
image = Image.new("L", (700, 300), color=bg)
font_filepath = "./data/fonts/font_files/AguafinaScript-Regular.ttf"  # relative to current directory
font_size = random.randrange(30,40)

draw = ImageDraw.Draw(image)
font = ImageFont.truetype(font_filepath, font_size)

xy = (10, 10)
text = '''I shall see the face of Mars, anyhow, and that will be a rare
experience.  It seems to me that a view of the heavenly bodies
through a fine telescope, as well as a tour round the world, should
form a part of a liberal education.
152309852340923'''

spacing = random.randrange(0,30)
color = random.randrange(10,100)
draw.text(xy, text, fill=color, font=font, spacing=spacing)
image

In [None]:
def line_boxes(text, font, spacing, xy):
    lines = text.split('\n')
    lh = font.getsize("A")[1] + spacing
    for i, line in enumerate(lines):
        h = font.getmask(line).size[1]
        w = font.getsize(line)[0]
        x_off,y_off = font.getoffset(line)
        t = i * lh + y_off + xy[1]
        l = xy[0] + x_off
        r = l+w
        b = t+h
        draw.rectangle((l, t, r, b), None, "#f00")
    return image

In [None]:
line_boxes(text,font,spacing,xy)

In [None]:
text_path = Path('data/fonts/texts/imdb_iam_wiki103.txt')

with open(text_path) as file:  
    text = file.read()

text = re.sub(r'[^\x00-\x7F]+','', text)

with open(str(text_path), "w") as text_file:
    print(text, file=text_file)

In [None]:
image.save()