# Task 3 - Data Enrichment with Object Detection (YOLO)

In [1]:
# import libraries
import sys
import os
import pandas as pd
from glob import glob

In [2]:
from pathlib import Path
from importlib import reload
# add the project root to the path
project_root = Path("..").resolve()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

### Database connection

In [3]:
#import the module
from src.database.Connection import Database
import src.database.Connection
# reload the module to ensure we have the latest version
reload(src.database.Connection)

<module 'src.database.Connection' from '/home/chalasimon/Documents/10academy/week 7/challenge/Telegram-Medical-Insights/src/database/Connection.py'>

In [4]:
host = "localhost"
port = 5432
# get the database credentials from environment variables or use defaults
# you can set these in a .env file or directly in your environment
# for example, using dotenv package to load from .env file
from dotenv import load_dotenv
load_dotenv()

host=os.getenv("POSTGRES_HOST")
dbname=os.getenv("POSTGRES_DB")
user=os.getenv("POSTGRES_USER")
password=os.getenv("POSTGRES_PASSWORD")
port=os.getenv("POSTGRES_PORT")

# create a database connection
db = Database(host=host,database=dbname, user=user, password=password, port=port)
# connect to the database
con = db.connect()

Connection to the database established successfully.


### Data Enrichment with YOLO

In [6]:
# import libraries
import os, time, logging
from dotenv import load_dotenv
from src.enrichment.yolo_detect import select_images_to_process, run_yolo_on_image, insert_detections
from ultralytics import YOLO

In [None]:
def main(limit=None, model_name=None, rerun=False):
    load_dotenv()
    model_name = model_name or os.getenv("YOLO_MODEL", "yolov8n.pt")
    logging.info(f"Loading YOLO model: {model_name}")
    model = YOLO(model_name)

    targets = select_images_to_process(con, limit=limit, rerun=rerun)
    logging.info(f"Images to process: {len(targets)}")

    inserted = 0
    t0 = time.time()

    for idx, (message_id, image_path) in enumerate(targets, 1):
        dets = run_yolo_on_image(model, image_path)
        batch = [
            (
                message_id,
                image_path,
                d["detected_class"],
                d["confidence"],
                d["bbox_xmin"],
                d["bbox_ymin"],
                d["bbox_xmax"],
                d["bbox_ymax"],
                model_name,
            )
            for d in dets
        ]
        inserted += insert_detections(con, batch)
        if idx % 50 == 0:
            logging.info(f"Processed {idx}/{len(targets)} images...")

    logging.info(f"Done. Inserted {inserted} detection rows in {time.time()-t0:.1f}s.")
