<a href="https://colab.research.google.com/github/MilanCugur/Offline_Writer_Identification/blob/master/NIST_DataUtil_Advanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
!rm -r ./sample_data/

In [0]:
!df

In [0]:
import os
from zipfile import ZipFile

# Extract Files

In [0]:
%%time

path = "drive/My Drive/HMath/NIST/by_write.zip"
archive = ZipFile(path, 'r')
archive.extractall('./')  
archive.close()

# Convert one image

In [0]:
import numpy as np
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from scipy.misc import imresize
from scipy import ndimage

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from skimage import io
from skimage import transform

from cv2 import resize
import cv2

In [0]:
check_path = './by_write/hsf_4/f2471_82/d2471_82/d2471_82_00003.png'  # label = 2
check_path = './by_write/hsf_2/f1009_29/u1009_29/u1009_29_00010.png'  # label = E
BOX_SIZE = 28

In [0]:
# Original images
img = imread(check_path)
print(img.shape)
plt.imshow(img)
plt.show()

In [0]:
# Just converted to Gray-Scale
img = rgb2gray(imread(check_path))
print(img.shape)
plt.imshow(img)
plt.show()

In [0]:
# Already implemented simple hack
img = rgb2gray(io.imread(check_path))[32:96, 32:96]
plt.imshow(img)
plt.show()

In [0]:
# Preimplemented 'resize' module from skimage
img = rgb2gray(transform.resize(imread(check_path), (BOX_SIZE, BOX_SIZE)))
print(img.shape)
plt.imshow(img)
plt.show()

# Paper method

In [0]:
def add_gaussian_noise(img):
  return ndimage.gaussian_filter(img, sigma=1)

In [0]:
# Add Gaussian noise and Crop image
def crop_image(path_to_image, pad=1):
  img = rgb2gray(imread(path_to_image))
  #print(img.shape)
  #plt.imshow(img)
  #plt.show()

  for v in range(0, 128):
    if not (np.all(img[v])==1):
      break
  v_min = v
  for v in range(127, -1, -1):
    if not (np.all(img[v])==1):
      break
  v_max = v
 
  for k in range(0, 128):
    if not np.all(img[:,k])==1:
      break;
  k_min = k
  for k in range(127, -1, -1):
    if not np.all(img[:,k])==1:
      break;
  k_max = k
  
  img = ndimage.gaussian_filter(img, sigma=1)

  #img[v_min] = [0 for _ in range(128)]
  #img[v_max] =  [0 for _ in range(128)]
  #img[:,k_min] = [0 for _ in range(128)]
  #img[:,k_max] = [0 for _ in range(128)]  

  v_min = max(0, v_min-pad)
  v_max = min(128, v_max+pad)
  k_min = max(0, k_min-pad)
  k_max = min(128, k_max+pad)
  img = img[v_min:v_max+1, k_min:k_max+1]
  
  #print(img.shape)
  #plt.imshow(img)
  #plt.show()
  
  return img

In [0]:
def square_image(img, pad=2):
  height, width = img.shape
  box_size = pad + max(height, width)
  new_img = np.ones(shape=(box_size, box_size))
  
  left = (box_size-height)//2
  right = (box_size-width)//2
  new_img[left:left+height, right:right+width] = img
  return new_img

In [0]:
def resize_image(img, box_size):
  new_img = np.zeros(shape=(box_size, box_size), dtype=np.uint8)
  new_img.fill(255)
  tmp_img = imresize(arr=img, size=box_size/img.shape[0], interp='bicubic')
  new_img[0:tmp_img.shape[0], 0:tmp_img.shape[1]] = tmp_img
  return new_img
  #return resize(img, dsize=(box_size, box_size), interpolation=cv2.INTER_CUBIC)
  #return imresize(arr=img, size=box_size/img.shape[0], interp='bicubic')

In [0]:
img = crop_image('./by_write/hsf_2/f1009_29/u1009_29/u1009_29_00010.png')
img = square_image(img)
img = resize_image(img, BOX_SIZE)

print(img)
print(img.shape)
plt.imshow(img)
plt.plot()

In [0]:
img = crop_image('./by_write/hsf_4/f2471_82/d2471_82/d2471_82_00003.png')
img = square_image(img)
img = resize_image(img, BOX_SIZE)

print(img)
print(img.shape)
plt.imshow(img)
plt.plot()

# Create Folder ImgDiscAdvanced

In [0]:
# Modules imported before
BOX_SIZE = 28

In [0]:
%%time

ImgDisk = './ImgDiskAdvanced_V2'
if not os.path.exists(ImgDisk):
  os.makedirs(ImgDisk)

for root, dirs, files in os.walk("./by_write"):
  for d in dirs:
    if d[0] == 'h':
      continue
    elif d[0] == 'f':
      tmppath = os.path.join(ImgDisk, d)
      if not os.path.exists(tmppath):
        os.makedirs(tmppath)
    elif d[0] in ['d', 'l', 'u', 'c']:
      mediumpath = root.split('/')[-1]
      tmppath = os.path.join(os.path.join(ImgDisk, mediumpath), d)
      if not os.path.exists(tmppath):
        os.makedirs(tmppath)
    else:
      raise Exception('Invalid folder in directory \"by_write\"')
    
  
  for f in files:
    if f.endswith('.png'):
      tmp_path = os.path.join(root, f)
      tmp_path_splitted = tmp_path.split('/')
      new_path = os.path.join(ImgDisk, tmp_path_splitted[3]+'/'+tmp_path_splitted[4]+'/'+tmp_path_splitted[5])
      
      #img = rgb2gray(imread(tmp_path))[32:96, 32:96]  
      img = crop_image(tmp_path)
      img = square_image(img)
      img = resize_image(img, BOX_SIZE)
      
      imsave(new_path, img)

In [0]:
# Small test
total = 0
for root, dirs, files in os.walk("./ImgDiskAdvanced_V2"):
  for f in files:
    total += 1

print(total)

In [0]:
!ls ./ImgDiskAdvanced_V2/ | wc -l  # Its ok! 500+500+500+598+499+500

# Write images to Disc

In [0]:
%%time

import shutil
shutil.make_archive("CompressedImgDiskAdvanced_V2", 'zip', "ImgDiskAdvanced_V2")

In [0]:
!ls -lsh

In [0]:
!mv ./CompressedImgDiskAdvanced_V2.zip './drive/My Drive/HMath/NIST/'

In [0]:
!ls -lsh './drive/My Drive/HMath/NIST/'

# DOCUMENTATION

In [0]:
pad = 2
img = rgb2gray(imread(check_path))
print(img.shape)
plt.imshow(img)
plt.show()

for v in range(0, 128):
  if not (np.all(img[v])==1):
    break
v_min = v
for v in range(127, -1, -1):
  if not (np.all(img[v])==1):
    break
v_max = v

for k in range(0, 128):
  if not np.all(img[:,k])==1:
    break;
k_min = k
for k in range(127, -1, -1):
  if not np.all(img[:,k])==1:
    break;
k_max = k
  
img = ndimage.gaussian_filter(img, sigma=1)

#img[v_min] = [0 for _ in range(128)]
#img[v_max] =  [0 for _ in range(128)]
#img[:,k_min] = [0 for _ in range(128)]
#img[:,k_max] = [0 for _ in range(128)]  

v_min = max(0, v_min-pad)
v_max = min(128, v_max+pad)
k_min = max(0, k_min-pad)
k_max = min(128, k_max+pad)
img = img[v_min:v_max+1, k_min:k_max+1]
 
print(img.shape)
plt.imshow(img)
plt.show()

In [0]:
plt.imshow(square_image(crop_image(check_path)))

In [0]:
img = np.zeros(shape=(50, 100))
plt.imshow(square_image(img))
plt.show()

In [0]:
img = square_image(img)
plt.imshow(imresize(arr=img, size=32.0/img.shape[0], interp='bicubic'))
plt.show()

img = square_image(crop_image(check_path))
img = resize_image(img, BOX_SIZE)
plt.show()