**Initialization**

In [119]:
import google.generativeai as genai
import json
import os
from dotenv import load_dotenv
import easyocr
import cv2
from rapidfuzz import fuzz

load_dotenv()

True

**Gemini Model**

In [120]:
def generate(prompt, image_path) -> list | dict:
    api_key = os.getenv("GEMINI_API_KEY")
    genai.configure(api_key=api_key)

    model = genai.GenerativeModel(
        model_name="gemini-2.5-flash",
        system_instruction="You are a helpful assistant that extracts newspaper fields from images.",
        generation_config={"response_mime_type": "application/json"}
    )

    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()

    response = model.generate_content([
        {"text": prompt},
        {"mime_type": "image/png", "data": image_bytes}
    ])

    raw_json = response.text
    data = json.loads(raw_json)

    return data

**Call to generate headline from Gemini**

In [121]:
headline_schema = {
	"type": "array",
	"items": {"type": "string"}
}

headline_prompt = (
	"You are given a newspaper image. "
	"Extract only the article all possible headlines — ignore advertisements, captions, subheadlines, and any other text. "
	"Return the result strictly matching this JSON schema:\n\n"
	f"{json.dumps(headline_schema, indent=2)}"
)

target_image_path = "page_58.png"

headlines = generate(headline_prompt, target_image_path)

for i, headline in enumerate(headlines):
    headlines[i] = headline
print(headlines)
# Dev log
print(json.dumps(headlines, indent=2, ensure_ascii=False))

['MVP Group mobilizes relief ops across Luzon', 'Middle-class Filipinos remain vulnerable to poverty—experts', 'GSIS offers emergency loan in calamity areas', 'German-PH businesses cautiously optimistic amid uncertainties', 'Bans ineffective in eliminating vices, says liquor chain executive', 'DTI steps up price monitoring', 'Filinvest Group gets 4 Stevie awards', 'BSP term deposit yields edge down']
[
  "MVP Group mobilizes relief ops across Luzon",
  "Middle-class Filipinos remain vulnerable to poverty—experts",
  "GSIS offers emergency loan in calamity areas",
  "German-PH businesses cautiously optimistic amid uncertainties",
  "Bans ineffective in eliminating vices, says liquor chain executive",
  "DTI steps up price monitoring",
  "Filinvest Group gets 4 Stevie awards",
  "BSP term deposit yields edge down"
]


**EasyOCR reads**

In [122]:
reader = easyocr.Reader(['en'])
image = cv2.imread(target_image_path)
results = reader.readtext(image)

image_raw = image.copy()
for (top_left, top_right, bottom_right, bottom_left), text, confidence in results:
    tl = (int(top_left[0]), int(top_left[1]))
    br = (int(bottom_right[0]), int(bottom_right[1]))
    cv2.rectangle(image_raw, tl, br, (0, 0, 255), 2)
    coord_label = f"{tl} {br}"
    cv2.putText(image_raw, coord_label, (tl[0], tl[1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

output_path_raw = target_image_path.replace(".png", "_ocr_boxes.png")
cv2.imwrite(output_path_raw, image_raw)

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


True

**Filter out text that matches with the list of headlines from Gemini**

In [123]:
bounding_box_text = []
bounding_box_coordinates = []

for coordinates, text, _ in results:
    
    for headline in headlines:
        score = fuzz.partial_ratio(headline, text)
        if score > 80:
            print(text, coordinates)
            top_left, _, bottom_right, _ = coordinates
            
            current_box = ((int(top_left[0]), int(top_left[1])),
                           (int(bottom_right[0]), int(bottom_right[1])))
            bounding_box_text.append(text)
            bounding_box_coordinates.append(current_box)

MVP [[np.int32(2993), np.int32(578)], [np.int32(3218), np.int32(578)], [np.int32(3218), np.int32(723)], [np.int32(2993), np.int32(723)]]
mobilizes [[np.int32(2993), np.int32(726)], [np.int32(3429), np.int32(726)], [np.int32(3429), np.int32(871)], [np.int32(2993), np.int32(871)]]
relief ops [[np.int32(2982), np.int32(864)], [np.int32(3415), np.int32(864)], [np.int32(3415), np.int32(1037)], [np.int32(2982), np.int32(1037)]]
across Luzon [[np.int32(2990), np.int32(1034)], [np.int32(3548), np.int32(1034)], [np.int32(3548), np.int32(1159)], [np.int32(2990), np.int32(1159)]]
of [[np.int32(3266), np.int32(1473)], [np.int32(3314), np.int32(1473)], [np.int32(3314), np.int32(1516)], [np.int32(3266), np.int32(1516)]]
ly [[np.int32(3002), np.int32(1795)], [np.int32(3040), np.int32(1795)], [np.int32(3040), np.int32(1838)], [np.int32(3002), np.int32(1838)]]
Middle-class Filipinos remain [[np.int32(107), np.int32(1930)], [np.int32(2617), np.int32(1930)], [np.int32(2617), np.int32(2211)], [np.int32(10

**Merging Bounding boxes that are close to each other**

In [124]:
def is_close(coordinate1, coordinate2, gap_x=30, gap_y=20):
    (x1_min, y1_min), (x1_max, y1_max) = coordinate1
    (x2_min, y2_min), (x2_max, y2_max) = coordinate2

    # Overlaps
    overlap_x = min(x1_max, x2_max) - max(x1_min, x2_min)
    overlap_y = min(y1_max, y2_max) - max(y1_min, y2_min)

    # Edge alignment (corner-based)
    align_x = (abs(x1_min - x2_min) <= gap_x) or (abs(x1_max - x2_max) <= gap_x)
    align_y = (abs(y1_min - y2_min) <= gap_y) or (abs(y1_max - y2_max) <= gap_y)

    # If boxes intersect at all, they’re close
    if overlap_x > 0 and overlap_y > 0:
        return True

    # Side-by-side: small gap on X AND top/bottom edges align (corner closeness)
    side_by_side = (
        (0 <= x2_min - x1_max <= gap_x) or
        (0 <= x1_min - x2_max <= gap_x)
    ) and align_y

    # Stacked: small gap on Y AND left/right edges align (corner closeness)
    stacked = (
        (0 <= y2_min - y1_max <= gap_y) or
        (0 <= y1_min - y2_max <= gap_y)
    ) and align_x

    return side_by_side or stacked


new = {}
print("Headlines:", headlines)
for headline in headlines:
    new[headline] = []

for i, text in enumerate(bounding_box_text):
    box = bounding_box_coordinates[i]
    for headline in new:

        print(f"{headline=}")
        if not new[headline]:
            new[headline].append({"text": text, "box": box})
            print("new")
            print(f"{text=}, {box=}")
        else:
            for i, currentBox in enumerate(new[headline]):
                score = fuzz.ratio(new[headline][i]["text"], headline)
                print(f"{text=}, {box=}, {currentBox=}, {score=}")
                if score >= 90:
                    break
                if is_close(currentBox["box"], box):
                    
                    (ex_tl_x, ex_tl_y), (ex_br_x, ex_br_y) = currentBox["box"]
                    (tl_x, tl_y), (br_x, br_y) = box

                    new_tl = (min(ex_tl_x, tl_x), min(ex_tl_y, tl_y))
                    new_br = (max(ex_br_x, br_x), max(ex_br_y, br_y))

                    new[headline][i]["box"] = (new_tl, new_br)
                    new[headline][i]["text"] += " " + text
                    break
            else:
                new[headline].append({"text": text, "box": box})

print(json.dumps(new, indent=2, ensure_ascii=False))

Headlines: ['MVP Group mobilizes relief ops across Luzon', 'Middle-class Filipinos remain vulnerable to poverty—experts', 'GSIS offers emergency loan in calamity areas', 'German-PH businesses cautiously optimistic amid uncertainties', 'Bans ineffective in eliminating vices, says liquor chain executive', 'DTI steps up price monitoring', 'Filinvest Group gets 4 Stevie awards', 'BSP term deposit yields edge down']
headline='MVP Group mobilizes relief ops across Luzon'
new
text='MVP', box=((2993, 578), (3218, 723))
headline='Middle-class Filipinos remain vulnerable to poverty—experts'
new
text='MVP', box=((2993, 578), (3218, 723))
headline='GSIS offers emergency loan in calamity areas'
new
text='MVP', box=((2993, 578), (3218, 723))
headline='German-PH businesses cautiously optimistic amid uncertainties'
new
text='MVP', box=((2993, 578), (3218, 723))
headline='Bans ineffective in eliminating vices, says liquor chain executive'
new
text='MVP', box=((2993, 578), (3218, 723))
headline='DTI ste

In [125]:
for key in new:
    print(new[key])
    for i in range(len(new[key])):
        for j in range(i + 1, len(new[key])):
            if is_close(new[key][i]["box"], new[key][j]["box"]):
                print(f"Merging {new[key][i]} and {new[key][j]}")
                (ex_tl_x, ex_tl_y), (ex_br_x, ex_br_y) = new[key][i]["box"]
                (tl_x, tl_y), (br_x, br_y) = new[key][j]["box"]

                new_tl = (min(ex_tl_x, tl_x), min(ex_tl_y, tl_y))
                new_br = (max(ex_br_x, br_x), max(ex_br_y, br_y))

                new[key][i]["box"] = (new_tl, new_br)
                new[key][i]["text"] += " " + new[key][j]["text"]
                del new[key][j]
                break
print(new)

[{'text': 'MVP mobilizes relief ops across Luzon', 'box': ((2982, 578), (3548, 1159))}]
[{'text': 'MVP mobilizes relief ops across Luzon Group Group', 'box': ((2982, 570), (3548, 1159))}, {'text': 'of', 'box': ((3266, 1473), (3314, 1516))}, {'text': 'ly', 'box': ((3002, 1795), (3040, 1838))}, {'text': 'Middle-class Filipinos remain vulnerable to poverty_experts', 'box': ((96, 1930), (2897, 2466))}]
[{'text': 'MVP mobilizes relief ops across Luzon Group Group', 'box': ((2982, 570), (3548, 1159))}, {'text': 'of', 'box': ((3266, 1473), (3314, 1516))}, {'text': 'ly', 'box': ((3002, 1795), (3040, 1838))}, {'text': 'Middle-class Filipinos remain vulnerable to poverty_experts', 'box': ((96, 1930), (2897, 2466))}, {'text': 'M M', 'box': ((108, 2523), (329, 2739))}, {'text': 'to to', 'box': ((2407, 2645), (2448, 2670))}, {'text': 'to to', 'box': ((1357, 2913), (1400, 2950))}, {'text': 'DTI steps up price monitoring', 'box': ((3095, 3085), (3508, 3234))}, {'text': 'poverty.', 'box': ((115, 3170)

In [126]:
from fuzzywuzzy import process

image_merged = image.copy()

for key in new:
    query = key
    choices = [i["text"] for i in new[key]]


    if not choices:
        continue  # skip if no choices

    # map choice → index
    choices_dict = {c: i for i, c in enumerate(choices)}

    # get best match
    best_match = process.extractOne(query, list(choices_dict.keys()))
    if best_match:
        text, score = best_match
        index = choices_dict[text]
        tl, br = new[key][index]["box"]
        cv2.rectangle(image_merged, tl, br, (0, 255, 0), 5)

# save output
output_path_merged = f"{os.path.splitext(target_image_path)[0]}_result{os.path.splitext(target_image_path)[1]}"
cv2.imwrite(output_path_merged, image_merged)



True

**Generate Byline from Gemini**

In [127]:
byline_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "headline": {"type": "string"},
            "byline": {"type": "string"},
        },
        "required": ["headline", "byline"]
    }
}

byline_prompt = (
	"You are given a newspaper image. "
	"Extract only the article's byline or author from the given headlines — ignore advertisements, captions, subheadlines, and any other text. "
    "use the following headlines to find the bylines: "
    f"{json.dumps(headlines, indent=2)}. "
	"Return the result strictly matching this JSON schema:\n\n"
	f"{json.dumps(byline_schema, indent=2)}"
)

bylines = generate(byline_prompt, target_image_path)

print(json.dumps(bylines, indent=2, ensure_ascii=False))

[
  {
    "headline": "MVP Group mobilizes relief ops across Luzon",
    "byline": ""
  },
  {
    "headline": "Middle-class Filipinos remain vulnerable to poverty—experts",
    "byline": "Thony Rose Lesaca"
  },
  {
    "headline": "GSIS offers emergency loan in calamity areas",
    "byline": ""
  },
  {
    "headline": "German-PH businesses cautiously optimistic amid uncertainties",
    "byline": "Othel V. Campos"
  },
  {
    "headline": "Bans ineffective in eliminating vices, says liquor chain executive",
    "byline": ""
  },
  {
    "headline": "DTI steps up price monitoring",
    "byline": ""
  },
  {
    "headline": "Filinvest Group gets 4 Stevie awards",
    "byline": ""
  },
  {
    "headline": "BSP term deposit yields edge down",
    "byline": "Thony Rose Lesaca"
  }
]


In [128]:
def is_close(coordinate1, coordinate2, gap_x=5, gap_y=20):
    (x1_min, y1_min), (x1_max, y1_max) = coordinate1
    (x2_min, y2_min), (x2_max, y2_max) = coordinate2

    # Overlaps
    overlap_x = min(x1_max, x2_max) - max(x1_min, x2_min)
    overlap_y = min(y1_max, y2_max) - max(y1_min, y2_min)

    # Edge alignment (corner-based)
    align_x = (abs(x1_min - x2_min) <= gap_x) or (abs(x1_max - x2_max) <= gap_x)
    align_y = (abs(y1_min - y2_min) <= gap_y) or (abs(y1_max - y2_max) <= gap_y)

    # If boxes intersect at all, they’re close
    if overlap_x > 0 and overlap_y > 0:
        return True

    # Side-by-side: small gap on X AND top/bottom edges align (corner closeness)
    side_by_side = (
        (0 <= x2_min - x1_max <= gap_x) or
        (0 <= x1_min - x2_max <= gap_x)
    ) and align_y

    # Stacked: small gap on Y AND left/right edges align (corner closeness)
    stacked = (
        (0 <= y2_min - y1_max <= gap_y) or
        (0 <= y1_min - y2_max <= gap_y)
    ) and align_x

    return side_by_side or stacked



print(is_close(((2990, 3507), (3506, 3632)), ((2983, 3623), (3578, 3776)), gap_x=5, gap_y=20))

True


In [129]:
def is_close(coordinate1, coordinate2, gap_x=5, gap_y=20):
    (x1_min, y1_min), (x1_max, y1_max) = coordinate1
    (x2_min, y2_min), (x2_max, y2_max) = coordinate2

    # Overlaps
    overlap_x = min(x1_max, x2_max) - max(x1_min, x2_min)
    overlap_y = min(y1_max, y2_max) - max(y1_min, y2_min)

    # Edge alignment (corner-based)
    align_x = (abs(x1_min - x2_min) <= gap_x) or (abs(x1_max - x2_max) <= gap_x)
    align_y = (abs(y1_min - y2_min) <= gap_y) or (abs(y1_max - y2_max) <= gap_y)

    # If boxes intersect at all, they’re close
    if overlap_x > 0 and overlap_y > 0:
        return True

    # Side-by-side: small gap on X AND top/bottom edges align (corner closeness)
    side_by_side = (
        (0 <= x2_min - x1_max <= gap_x) or
        (0 <= x1_min - x2_max <= gap_x)
    ) and align_y

    # Stacked: small gap on Y AND left/right edges align (corner closeness)
    stacked = (
        (0 <= y2_min - y1_max <= gap_y) or
        (0 <= y1_min - y2_max <= gap_y)
    ) and align_x

    return side_by_side or stacked


# Test with your example
result = is_close(((1712, 5767), (3423, 5912)), ((2371, 5912), (2835, 5952)), gap_x=5, gap_y=20)
print(f"Result: {result}")


Result: False
