In [1]:
import pandas as pd
from pathlib import Path
from doclayout_yolo import YOLOv10

def process_image(image_path, model_path="../models/doclayout_yolo_docstructbench_imgsz1024.pt", 
                  imgsz=1024, conf=0.2, device="cpu", output_dir="../results"):
    """
    Process an image with YOLOv10 model, detect objects and save results.
    
    Args:
        image_path (str): Path to the input image
        model_path (str): Path to the YOLOv10 model
        imgsz (int): Prediction image size
        conf (float): Confidence threshold
        device (str): Device to use ('cuda:0' or 'cpu')
        output_dir (str): Directory to save results
    
    Returns:
        pd.DataFrame: DataFrame containing detection results
    """
    # Load the pre-trained model
    model = YOLOv10(model_path)

    # Perform prediction
    det_res = model.predict(
        image_path,
        imgsz=imgsz,
        conf=conf,
        device=device
    )

    # Process the detection result
    try:
        # Convert image_path to Path object if it's not already
        image_path = Path(image_path)
        filename = image_path.name

        # Get image dimensions from the result
        img_height, img_width = det_res[0].orig_shape
        
        # Prepare to collect detections
        all_detections = []
        
        # Extract bounding box information
        for box in det_res[0].boxes:
            x1, y1, x2, y2 = box.xyxy[0].tolist()
            conf = box.conf[0].item()
            cls = box.cls[0].item()
            cls_name = det_res[0].names[int(cls)]

            detection_info = {
                "filename": filename,
                "class": cls_name,
                "confidence": conf,
                "x1": x1,
                "y1": y1,
                "x2": x2,
                "y2": y2,
                "page_height": img_height,
                "page_width": img_width,
            }
            all_detections.append(detection_info)
        
        # Create output filenames based on input filename
        base_filename = image_path.stem
        output_dir = Path(output_dir)
        output_dir.mkdir(exist_ok=True, parents=True)
        
        output_parquet = output_dir / f"{base_filename}_detected.parquet"
        output_json = output_dir / f"{base_filename}_detected.json"
        
        # Save detection results to Parquet and JSON
        df = pd.DataFrame(all_detections)
        df.to_parquet(output_parquet, index=False)
        df.to_json(output_json, orient="records", indent=2)
        
        print(f"Results saved to {output_parquet} and {output_json}")
        print(f"Detected {len(all_detections)} objects")
        
        # Display a sample of the data
        if len(df) > 0:
            print("\nSample of detected objects:")
            print(df.head(3))
            
        return df
    
    except Exception as e:
        import traceback
        print(f"Error processing detection result: {str(e)}")
        print(traceback.format_exc())
        return None

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
process_image("../data/bib13991099_18650102_0_1_0001_2218x3150.jpg")




FileNotFoundError: data/bib13991099_18650102_0_1_0001_2218x3150.jpg does not exist