-
Notifications
You must be signed in to change notification settings - Fork 139
/
signature_extractor.py
72 lines (61 loc) · 2.29 KB
/
signature_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#----------------------------------------------
#--- Author : Ahmet Ozlu
#--- Mail : ahmetozlu93@gmail.com
#--- Date : 17th September 2018
#----------------------------------------------
import cv2
import numpy as np
from skimage import measure
from skimage.measure import label, regionprops
from skimage.color import label2rgb
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scipy import ndimage
from skimage import morphology
def extract_signature(source_image):
# read the input image
img = source_image
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1] # ensure binary
# connected component analysis by scikit-learn framework
blobs = img > img.mean()
blobs_labels = measure.label(blobs, background=1)
image_label_overlay = label2rgb(blobs_labels, image=img)
fig, ax = plt.subplots(figsize=(10, 6))
'''
# plot the connected components (for debugging)
ax.imshow(image_label_overlay)
ax.set_axis_off()
plt.tight_layout()
plt.show()
'''
the_biggest_component = 0
total_area = 0
counter = 0
average = 0.0
for region in regionprops(blobs_labels):
if region.area>10:
total_area = total_area + region.area
counter = counter + 1
#print region.area # (for debugging)
# take regions with large enough areas
if region.area >= 250:
if (region.area > the_biggest_component):
the_biggest_component = region.area
average = (total_area/counter)
print ("the_biggest_component: " + str(the_biggest_component))
print ("average: " + str(average))
# experimental-based ratio calculation, modify it for your cases
# a4_constant is used as a threshold value to remove connected pixels are smaller than a4_constant for A4 size scanned documents
a4_constant = (((average/84.0)*250.0)+100)*1.5
print ("a4_constant: " + str(a4_constant))
# remove the connected pixels are smaller than a4_constant
b = morphology.remove_small_objects(blobs_labels, a4_constant)
# save the the pre-version which is the image is labelled with colors as considering connected components
plt.imsave('pre_version.png', b)
# read the pre-version
img = cv2.imread('pre_version.png', 0)
# ensure binary
img = cv2.threshold(img, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
# save the the result
#cv2.imwrite("output.png", img)
return img