In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:

src = "output_images2/200290_0010_page_08.png"
dst = "./out.png"

# -----------------------
# 1. デスクュー (水平化)
# -----------------------
gray = cv2.imread(src, cv2.IMREAD_GRAYSCALE)
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
linesP = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=200,
                         minLineLength=400, maxLineGap=20)

best_len, best_angle = 0, 0
if linesP is not None:
    for x1,y1,x2,y2 in linesP[:,0]:
        dx, dy = x2-x1, y2-y1
        length = np.hypot(dx,dy)
        angle  = np.degrees(np.arctan2(dy,dx))
        if abs(angle) < 10 and length > best_len:
            best_len, best_angle = length, angle
h, w = gray.shape
M = cv2.getRotationMatrix2D((w/2,h/2), best_angle, 1.0)
deskew = cv2.warpAffine(gray, M, (w,h), flags=cv2.INTER_LINEAR, borderValue=255)

# -----------------------
# 2. binarize (ink=1)
# -----------------------
_, bw = cv2.threshold(deskew, 0, 1, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

# -----------------------
# 3. dash/solid column classification (k=3 on col counts)
# -----------------------
col_counts = bw.sum(axis=0).astype(np.float32)
samples = col_counts.reshape(-1,1)
criteria = (cv2.TERM_CRITERIA_EPS+cv2.TERM_CRITERIA_MAX_ITER, 100, 0.1)
K=10
_, labels, centers = cv2.kmeans(samples, K, None, criteria, 4,
                                cv2.KMEANS_PP_CENTERS)
sort_idx = np.argsort(-centers.squeeze())
solid_idx  = sort_idx[0]
dashed_idx = sort_idx[1:5]
dashed_cols = np.where(np.isin(labels.squeeze(), dashed_idx))[0]

# create column mask for dashed boundary zones (±1px for margin)
col_mask = np.zeros_like(bw, dtype=np.uint8)
for x in dashed_cols:
    col_mask[:, max(0,x-1):min(w, x+2)] = 1  # widen a tad

# -----------------------
# 4. isolate short vertical runs (likely dash segments)
#    a) morphological opening removes *all* short runs,
#       diff = original - opened = only short vertical blobs
# -----------------------
L_OPEN = 25  # must be > dash segment height, < typical solid gap
kernel_vert = cv2.getStructuringElement(cv2.MORPH_RECT, (1, L_OPEN))
opened = cv2.morphologyEx(bw, cv2.MORPH_OPEN, kernel_vert)
diff   = cv2.subtract(bw, opened)  # potential dash segments

# -----------------------
# 5. keep only diff pixels within dashed column mask
# -----------------------
dash_only = cv2.bitwise_and(diff, diff, mask=col_mask)

# -----------------------
# 6. remove those pixels (set to background)
# -----------------------
bw_clean = cv2.subtract(bw, dash_only)
clean = (1 - bw_clean)*255  # back to normal polarity

cv2.imwrite(dst, clean)

# viz
fig, ax = plt.subplots(1,4, figsize=(20,5))
ax[0].imshow(gray,cmap='gray'); ax[0].set_title("original"); ax[0].axis("off")
ax[1].imshow(deskew,cmap='gray'); ax[1].set_title("deskewed"); ax[1].axis("off")
ax[2].imshow(dash_only*255, cmap='gray'); ax[2].set_title("detected dash pix"); ax[2].axis("off")
ax[3].imshow(clean,cmap='gray'); ax[3].set_title("final"); ax[3].axis("off")
plt.tight_layout()
plt.show()

print("saved to", dst)
