In [1]:
import cv2
import numpy as np

In [2]:
INPUT_FILE = 'ALA1934_RR.pdf-5_1.png'
INPUT_FILE_BASE = INPUT_FILE[:INPUT_FILE.rindex('.')]

img = cv2.imread(INPUT_FILE)
img_w, img_h = img.shape[:2]
img_w, img_h

(3505, 2480)

In [3]:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
cv2.imwrite(INPUT_FILE_BASE + '-edges.png', edges)

True

In [4]:
#![edges](ALA1934_RR.pdf-3_1-edges.png)

In [4]:
lines_only = np.zeros((img_w, img_h, 1), np.uint8)

max_img_dim = max(img_w, img_h)
# min_line_len = max_img_dim / 50
# max_line_gap = max_img_dim / 200

# print('min_line_len', min_line_len)
# print('max_line_gap', max_line_gap)

res_rho = 1
res_theta = np.pi / 500
votes_thresh = round(img_w / 5)
print('votes_thresh', votes_thresh)
lines = cv2.HoughLines(edges, res_rho, res_theta, votes_thresh)

print('len(lines)', len(lines))

for l in lines:
    rho, theta = l[0]
    a = np.cos(theta)
    b = np.sin(theta)
    x0 = a * rho
    y0 = b * rho
    x1 = int(x0 + max_img_dim*(-b))
    y1 = int(y0 + max_img_dim*(a))
    x2 = int(x0 - max_img_dim*(-b))
    y2 = int(y0 - max_img_dim*(a))
    
    cv2.line(lines_only, (x1, y1), (x2, y2), 255, 1)
    cv2.line(img, (x1, y1), (x2, y2), (0,255,0), 1)

cv2.imwrite(INPUT_FILE_BASE + '-linesonly.png', lines_only)
cv2.imwrite(INPUT_FILE_BASE + '-hough.png', img)

votes_thresh 701
len(lines) 42


True

In [6]:
### Lines
#![lines](ALA1934_RR.pdf-3_1-linesonly.png)

### Lines on orig. Image
#![hough](ALA1934_RR.pdf-3_1-hough.png)

In [5]:
from math import degrees, radians

from pdftabextract.geom import pt, vecangle

UNITY_VECS = (
    pt(0, 1),   # up
    pt(1, 0),   # right
    pt(0, -1),  # down
    pt(-1, 0)   # left
)


# find rotation / skew
pihlf = np.pi / 2
pi4th = np.pi / 4

# lines = (
#     ((1, pihlf), ),  # up
#     ((1, 0), ),          # right
#     ((1, np.pi + pihlf), ),  # down
#     ((1, np.pi), ),          # left
#     ((1, -pihlf), ),  # down
#     ((1, -0), ),          # right
#     ((1, -(np.pi + pihlf)), ),  # up
#     ((1, -np.pi), ),          # right
#     ((1, pihlf + 0.1), ),  # up
#     ((1, 0.1), ),          # right
#     ((1, np.pi + pihlf + 0.1), ),  # down
#     ((1, np.pi + 0.1), ),          # left
#     ((1, pihlf - 0.1), ),  # up
#     ((1, -0.1), ),          # right
#     ((1, np.pi + pihlf - 0.1), ),  # down
#     ((1, np.pi - 0.1), ),          # left
#     ((1, -(pihlf + 0.1)), ),  # down
#     ((1, -0.1), ),          # right
#     ((1, -(np.pi + pihlf + 0.1)), ),  # up
#     ((1, -(np.pi + 0.1)), ),          # left
# )

hori_deviations = []
vert_deviations = []

for l in lines:
    _, theta = l[0]
    
    if theta >= np.pi:
        theta_norm = theta - np.pi
    elif theta < -np.pi:
        theta_norm = theta + 2 * np.pi
    elif theta < 0:
        theta_norm = theta + np.pi
    else:
        theta_norm = theta
    
    assert 0 <= theta_norm < np.pi
    
    hori_deviation = pihlf - theta_norm
    #print(degrees(vert_deviation))
        
    if abs(hori_deviation) > pi4th:  # vertical
        deviation = hori_deviation - pihlf
        if deviation < -pihlf:
            deviation += np.pi
        vert_deviations.append(-deviation)
    else:
        hori_deviations.append(-hori_deviation)
        
    #assert -pi4th <= deviation <= pi4th

if hori_deviations:
    median_hori_dev = np.median(hori_deviations)
else:
    print('warning: no horizontal lines found!')
    median_hori_dev = 0

if vert_deviations:
    median_vert_dev = np.median(vert_deviations)
else:
    median_vert_dev = 0
    print('warning: no vertical lines found!')

degrees(median_hori_dev), degrees(median_vert_dev)

(-0.7199950566895569, 0.0)

In [9]:
ROT_THRESH = radians(0.5)
ROT_SAME_DIR_THRESH = radians(1)

hori_rot_above_tresh = abs(median_hori_dev) > ROT_THRESH
vert_rot_above_tresh = abs(median_vert_dev) > ROT_THRESH

rotation = None
vert_skew = None
hori_skew = None
if hori_rot_above_tresh and vert_rot_above_tresh:
    if abs(median_hori_dev - median_vert_dev) < ROT_SAME_DIR_THRESH:
        rotation = (median_hori_dev + median_vert_dev) / 2
    else:
        print('warning: horizontal / vertical rotation not in same direction (%f / %f)'
              % (degrees(median_hori_dev, median_vert_dev)))
elif hori_rot_above_tresh:
    hori_skew = median_hori_dev
elif vert_rot_above_tresh:
    vert_skew = median_vert_dev
#else:  # no rotation / skew

rotation, vert_skew, hori_skew

(None, None, -0.012566284337315992)