**Initialization**

In [9]:
import google.generativeai as genai
import json
import os
from dotenv import load_dotenv
import easyocr
import cv2
from rapidfuzz import fuzz

load_dotenv()

True

**Gemini Model**

In [10]:
def generate(prompt, image_path) -> list | dict:
    api_key = os.getenv("GEMINI_API_KEY")
    genai.configure(api_key=api_key)

    model = genai.GenerativeModel(
        model_name="gemini-2.5-flash",
        system_instruction="You are a helpful assistant that extracts newspaper fields from images.",
        generation_config={"response_mime_type": "application/json"}
    )

    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()

    response = model.generate_content([
        {"text": prompt},
        {"mime_type": "image/png", "data": image_bytes}
    ])

    raw_json = response.text
    data = json.loads(raw_json)

    return data

**Call to generate from Gemini**

In [11]:
headline_schema = {
	"type": "array",
	"items": {"type": "string"}
}

headline_prompt = (
	"You are given a newspaper image. "
	"Extract only the article all possible headlines — ignore advertisements, captions, subheadlines, and any other text. "
	"Return the result strictly matching this JSON schema:\n\n"
	f"{json.dumps(headline_schema, indent=2)}"
)

target_image_path = "page_49.png"

headlines = generate(headline_prompt, target_image_path)

for i, headline in enumerate(headlines):
    headlines[i] = headline
print(headlines)
# Dev log
print(json.dumps(headlines, indent=2, ensure_ascii=False))

['Makati City distributes goods in evac areas', 'Drainage repairs start in Commonwealth Ave.', 'Taguig City cops foil drug session, arrest 4', 'DMW, US group to aid seafarers', 'PH, India to hold joint patrols in WPS in August', 'Phivolcs issues lahar advisory for Mt. Mayon areas', "'Probe P2.5b Las Piñas flood project'"]
[
  "Makati City distributes goods in evac areas",
  "Drainage repairs start in Commonwealth Ave.",
  "Taguig City cops foil drug session, arrest 4",
  "DMW, US group to aid seafarers",
  "PH, India to hold joint patrols in WPS in August",
  "Phivolcs issues lahar advisory for Mt. Mayon areas",
  "'Probe P2.5b Las Piñas flood project'"
]


**EasyOCR reads**

In [12]:
reader = easyocr.Reader(['en'])
image = cv2.imread(target_image_path)
results = reader.readtext(image, width_ths=2, link_threshold=0.3)

image_raw = image.copy()
for (top_left, top_right, bottom_right, bottom_left), text, confidence in results:
    tl = (int(top_left[0]), int(top_left[1]))
    br = (int(bottom_right[0]), int(bottom_right[1]))
    cv2.rectangle(image_raw, tl, br, (0, 0, 255), 2)
    coord_label = f"{tl} {br}"
    cv2.putText(image_raw, coord_label, (tl[0], tl[1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

output_path_raw = target_image_path.replace(".png", "_ocr_boxes.png")
cv2.imwrite(output_path_raw, image_raw)

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


True

**Finds the coordinates of the headings**

In [13]:
bounding_box_text = []
bounding_box_coordinates = []

for coordinates, text, _ in results:
    print(text)
    for headline in headlines:
        
        score = fuzz.partial_ratio(headline, text)

        if score > 80 and len(text) > 3:

            top_left, _, bottom_right, _ = coordinates
            
            current_box = ((int(top_left[0]), int(top_left[1])),
                           (int(bottom_right[0]), int(bottom_right[1])))
            bounding_box_text.append(text)
            bounding_box_coordinates.append(current_box)

print(bounding_box_text)
print(bounding_box_coordinates)
print(len(bounding_box_text), len(bounding_box_coordinates))

for i in range(len(bounding_box_text)):
    
    print(bounding_box_text[i])
    print(bounding_box_coordinates[i])


NEWS
FRIDAY, JULY =
A3
'25,2025
mst daydesk@gmail.com
Ioo
IN BRIEF
Standara
Makati City distributes
goods in evac areas
THE Makati
government has
distributed relief goods to 103 fami-
E2"~Il
lies and 306 other individuals stay-
ing at four evacuation centers due
to  ongoing rains  and  flooding
Metro Manila. Mayor Nancy Binay
personally visited the Pio del Pilar
Main Elementary  School to check
on evacuees and assess their needs
the   visit, Binay  assured
the displaced residents that the local
government is monitoring the situ-
ation and is ready to provide addi-
tional support as needed. She also
supervised the distribution of food,
clean water; and essential supplies.
Evacuation centers   were  opened
at   San   Antonio   Covered   Court;
Bangkal   Covered   Court;   Palanan
Covered Court, and the Pio del Pilar
Main Elementary School. Families
were
provided with modular tents,
Disaster Assistance Family Access
Cards   (DAFAC);
ready-to-eat
meals. Joel E. Zurbano
Drainage repairs sta

In [27]:
def is_close(coordinate1, coordinate2, gap_x=5, gap_y=5):
    (x1_min, y1_min), (x1_max, y1_max) = coordinate1
    (x2_min, y2_min), (x2_max, y2_max) = coordinate2

    # Expand both boxes by the gap
    x1_min, y1_min, x1_max, y1_max = x1_min - gap_x, y1_min - gap_y, x1_max + gap_x, y1_max + gap_y
    x2_min, y2_min, x2_max, y2_max = x2_min - gap_x, y2_min - gap_y, x2_max + gap_x, y2_max + gap_y

    # Overlap condition (after expansion)
    horizontal_overlap = not (x1_max < x2_min or x2_max < x1_min)
    vertical_overlap = not (y1_max < y2_min or y2_max < y1_min)

    return horizontal_overlap and vertical_overlap


new = {}
print("Headlines:", headlines)
for headline in headlines:
    new[headline] = []

for i, text in enumerate(bounding_box_text):
    box = bounding_box_coordinates[i]
    for headline in new:
        score = fuzz.partial_ratio(headline, text)
        if score > 80 and len(text) > 3:
            if not new[headline]:
                new[headline].append({"text": text, "box": box})
                print("new")
                print(f"{text=}, {box=}")
            else:
                for i, currentBox in enumerate(new[headline]):
                    score = fuzz.ratio(new[headline][i]["text"], headline)
                    print(f"{text=}, {box=}, {currentBox=}, {score=}")
                    if score >= 99:
                        break
                    if is_close(currentBox["box"], box):
                        
                        
                        (ex_tl_x, ex_tl_y), (ex_br_x, ex_br_y) = currentBox["box"]
                        (tl_x, tl_y), (br_x, br_y) = box

                        new_tl = (min(ex_tl_x, tl_x), min(ex_tl_y, tl_y))
                        new_br = (max(ex_br_x, br_x), max(ex_br_y, br_y))

                        new[headline][i]["box"] = (new_tl, new_br)
                        new[headline][i]["text"] += " " + text
                        break
                else:
                    new[headline].append({"text": text, "box": box})

print(json.dumps(new, indent=2, ensure_ascii=False))

Headlines: ['Makati City distributes goods in evac areas', 'Drainage repairs start in Commonwealth Ave.', 'Taguig City cops foil drug session, arrest 4', 'DMW, US group to aid seafarers', 'PH, India to hold joint patrols in WPS in August', 'Phivolcs issues lahar advisory for Mt. Mayon areas', "'Probe P2.5b Las Piñas flood project'"]
new
text='Makati City distributes', box=((157, 748), (760, 823))
text='goods in evac areas', box=((189, 805), (718, 890)), currentBox={'text': 'Makati City distributes', 'box': ((157, 748), (760, 823))}, score=69.6969696969697
new
text='Drainage repairs start', box=((159, 2165), (750, 2250))
text='in Commonwealth Ave.', box=((147, 2230), (759, 2293)), currentBox={'text': 'Drainage repairs start', 'box': ((159, 2165), (750, 2250))}, score=67.6923076923077
new
text='DMW; US group to aid seafarers', box=((821, 2403), (3425, 2697))
new
text='PH; India tohold', box=((2994, 3516), (3628, 3654))
new
text='Taguig City cops foil', box=((182, 3602), (724, 3687))
text

In [28]:
for key in new:
    print(new[key])
    for i in range(len(new[key])):
        for j in range(i + 1, len(new[key])):
            if is_close(new[key][i]["box"], new[key][j]["box"]):
                print(f"Merging {new[key][i]} and {new[key][j]}")
                (ex_tl_x, ex_tl_y), (ex_br_x, ex_br_y) = new[key][i]["box"]
                (tl_x, tl_y), (br_x, br_y) = new[key][j]["box"]

                new_tl = (min(ex_tl_x, tl_x), min(ex_tl_y, tl_y))
                new_br = (max(ex_br_x, br_x), max(ex_br_y, br_y))

                new[key][i]["box"] = (new_tl, new_br)
                new[key][i]["text"] += " " + new[key][j]["text"]
                del new[key][j]
                break
print(new)

[{'text': 'Makati City distributes goods in evac areas', 'box': ((157, 748), (760, 890))}]
[{'text': 'Drainage repairs start in Commonwealth Ave.', 'box': ((147, 2165), (759, 2293))}]
[{'text': 'Taguig City cops foil session, arrest 4 drug', 'box': ((178, 3602), (739, 3749))}, {'text': 'City City', 'box': ((391, 890), (466, 965))}, {'text': 'City City', 'box': ((332, 3762), (404, 3820))}]
[{'text': 'DMW; US group to aid seafarers', 'box': ((821, 2403), (3425, 2697))}]
[{'text': 'PH; India tohold jointpatrolsin WPSin August', 'box': ((2981, 3516), (3628, 3967))}, {'text': 'just', 'box': ((658, 2865), (737, 2898))}]
[{'text': 'Phivolcs issues lahar advisory for Mt Mayon areas', 'box': ((104, 4869), (2226, 5078))}]
[{'text': "'Probe P2Sb Las Pifas flood project'", 'box': ((1893, 5594), (3469, 5783))}]
{'Makati City distributes goods in evac areas': [{'text': 'Makati City distributes goods in evac areas', 'box': ((157, 748), (760, 890))}], 'Drainage repairs start in Commonwealth Ave.': [{'

In [29]:
from fuzzywuzzy import process

image_merged = image.copy()

for key in new:
    query = key
    choices = [i["text"] for i in new[key]]

    if not choices:
        continue  # skip if no choices

    # map choice → index
    choices_dict = {c: i for i, c in enumerate(choices)}

    # get best match
    best_match = process.extractOne(query, list(choices_dict.keys()))
    if best_match:
        text, score = best_match
        index = choices_dict[text]

        tl, br = new[key][index]["box"]
        print(text, tl, br)
        cv2.rectangle(image_merged, tl, br, (0, 255, 0), 5)

# save output
output_path_merged = f"{os.path.splitext(target_image_path)[0]}_result{os.path.splitext(target_image_path)[1]}"
cv2.imwrite(output_path_merged, image_merged)


Makati City distributes goods in evac areas (157, 748) (760, 890)
Drainage repairs start in Commonwealth Ave. (147, 2165) (759, 2293)
Taguig City cops foil session, arrest 4 drug (178, 3602) (739, 3749)
DMW; US group to aid seafarers (821, 2403) (3425, 2697)
PH; India tohold jointpatrolsin WPSin August (2981, 3516) (3628, 3967)
Phivolcs issues lahar advisory for Mt Mayon areas (104, 4869) (2226, 5078)
'Probe P2Sb Las Pifas flood project' (1893, 5594) (3469, 5783)


True