In [3]:
pip install flask werkzeug ultralytics opencv-python-headless PyMuPDF cv2 ultralytics pytesseract

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement cv2 (from versions: none)
ERROR: No matching distribution found for cv2


In [None]:
import os
import cv2 # OpenCV for image manipulation
from ultralytics import YOLO

# --- Part 1: Training ---

def train_model():
    """
    Trains a new YOLOv8 model.
    This function is run separately *before* you start the web app.
    It looks for your dataset in 'dataset/data.yaml'.
    """
    print("Starting YOLOv8 model training...")
    
    # Load a pre-trained YOLOv8 'small' model
    model = YOLO('yolov8s.pt') 
    
    # Train the model
    # data: Path to your data.yaml file
    # epochs: How many times to go over the data. Start with 100.
    # imgsz: Resize images to this size for training. 640 is standard.
    results = model.train(data='dataset/data.yaml', epochs=100, imgsz=640)
    
    print("Training complete!")
    print(f"Model saved to: {results.save_dir}")
    print(f"Your 'best.pt' model is ready. Update MODEL_PATH in app.py if needed.")
    
    # After training, it's good practice to validate
    model.val()


# --- Part 2: Inference ---

def run_inference(model_path, source_image_path, face_output_dir):
    """
    Runs inference using your trained model on a single image.
    
    Args:
        model_path (str): Path to your 'best.pt' trained model.
        source_image_path (str): Path to the new image to process.
        face_output_dir (str): Directory to save the cropped face image.

    Returns:
        tuple: (dict: extracted_data, str: face_image_path)
               - extracted_data: {'name': '...', 'dob': '...', 'id_number': '...'}
               - face_image_path: Relative path to the saved face crop (e.g., 'static/faces/face_1.png')
    """
    
    # Load your custom-trained model
    try:
        model = YOLO(model_path)
    except Exception as e:
        print(f"Error loading model from {model_path}. Did you train it?")
        raise e

    # Run inference
    results = model(source_image_path)

    # --- Process Results ---
    
    # `results` is a list of Results objects. We take the first one.
    result = results[0]
    
    # Original image for cropping
    img = cv2.imread(source_image_path)
    if img is None:
        raise FileNotFoundError(f"Could not read source image: {source_image_path}")

    # Get class names from the model
    class_names = result.names
    
    extracted_data = {}
    face_image_path = None
    
    # We will use OCR on the cropped boxes.
    # For a real production app, you'd use a robust OCR library
    # like Tesseract (pytesseract) or an API (Google Vision).
    # For this example, we'll just placeholder the text extraction.
    
    for box in result.boxes:
        # Get class ID and confidence
        class_id = int(box.cls[0])
        class_name = class_names[class_id]
        confidence = float(box.conf[0])
        
        # Get bounding box coordinates (xyxy format)
        x1, y1, x2, y2 = [int(coord) for coord in box.xyxy[0]]
        
        # Crop the detected region from the original image
        cropped_img = img[y1:y2, x1:x2]
        
        # --- Handle Face vs. Text Fields ---
        
        if class_name == 'face':
            if confidence > 0.5: # Only save high-confidence faces
                # Create a unique filename for the face
                base_filename = os.path.splitext(os.path.basename(source_image_path))[0]
                face_filename = f"face_{base_filename}_{x1}.png"
                
                # Full path to save the face
                full_save_path = os.path.join(face_output_dir, face_filename)
                
                # Save the cropped face image
                cv2.imwrite(full_save_path, cropped_img)
                
                # Store the *relative* path for use in HTML
                face_image_path = os.path.join(face_output_dir, face_filename).replace('\\', '/')
                print(f"Saved face to: {face_image_path}")
                
        else:
            # --- Placeholder for OCR ---
            # In a real app, you would pass `cropped_img` to an OCR function.
            # text = run_ocr(cropped_img) 
            # For now, we'll use a placeholder.
            # ---------------------------
            text = f"[OCR result for {class_name}]"
            
            print(f"Detected: {class_name} (Conf: {confidence:.2f})")
            
            # Store the extracted text (or placeholder)
            # This handles finding the *best* detection if there are duplicates
            if class_name not in extracted_data or confidence > extracted_data.get(f"{class_name}_conf", 0):
                extracted_data[class_name] = text
                extracted_data[f"{class_name}_conf"] = confidence

    
    # Clean up the data dict to only contain text values
    final_data = {key: val for key, val in extracted_data.items() if not key.endswith('_conf')}
    
    return final_data, face_image_path

# --- Simple OCR Stub (Example) ---
#
# To make this real, install `pytesseract` and `tesseract-ocr`:
# pip install pytesseract
# (You also need to install the Tesseract binary: https://github.com/tesseract-ocr/tesseract)
#
# import pytesseract
#
# def run_ocr(image):
#     """Runs OCR on a CV2 image."""
#     try:
#         # Pre-process image for better OCR (e.g., grayscale, threshold)
#         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
#         
#         # Run Tesseract
#         text = pytesseract.image_to_string(thresh, config='--psm 6')
#         return text.strip()
#     except Exception as e:
#         print(f"OCR Error: {e}")
#         return ""
