In [None]:
import os
import shutil
from pathlib import Path
import zipfile
from PIL import Image, ImageDraw, ImageFont
import base64
import io
import time
from ultralytics import YOLO
import gradio as gr
from openai import OpenAI

# --- Configuration ---
KAGGLE_USERNAME = os.getenv("avinashkrishna687")
KAGGLE_KEY = os.getenv("ffd83477d431520935fc3ea8bcc91297")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # This one is CRITICAL for GPT-4o brand identification

DATASET_NAME = "sushovansaha9/flickr-logos-27-dataset"
DATA_DIR = "data"
MODEL_PATH = "yolov8_logo_detector.pt" # Path to save/load the trained YOLOv8 model

CLASSES = [
    "Adidas", "Apple", "BMW", "Citroen", "Coca Cola", "DHL", "Fedex", "Ferrari",
    "Ford", "Google", "Heineken", "HP", "McDonalds", "Mini", "Nbc", "Nike",
    "Pepsi", "Porsche", "Puma", "Red Bull", "Sprite", "Starbucks", "Intel",
    "Texaco", "Unisef", "Vodafone", "Yahoo"
]
CLASS_TO_ID = {name: i for i, name in enumerate(CLASSES)}
ID_TO_CLASS = {i: name for i, name in enumerate(CLASSES)}

# --- Helper function for dataset processing ---
def process_annotation_file(annotation_file_path, image_source_dir, image_dest_dir, label_dest_dir):
    """
    Processing single annotation file and moves images/labels to YOLO format.
    Args:
        annotation_file_path (Path): annotations.txt file.
        image_source_dir (Path): containing original images.
        image_dest_dir (Path): directory for images in YOLO format.
        label_dest_dir (Path): directory for label .txt files in YOLO format.
    """
    print(f"Processing annotations from {annotation_file_path}...")

    label_dest_dir.mkdir(parents=True, exist_ok=True)


    for f in label_dest_dir.glob("*.txt"):
        os.remove(f)

    with open(annotation_file_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 7:
                continue

            img_filename = parts[0]
            class_name = parts[1]
            try:
                x1, y1, x2, y2 = map(int, parts[3:7])
            except ValueError:
                print(f"Skipping malformed coordinates in line: {line.strip()}")
                continue

            if class_name not in CLASS_TO_ID:
                # print(f"Give Warning: Class '{class_name}' not in predefined classes. Skipping {img_filename}")
                continue # Skip if class is not one of the 27 logos

            src_img_path = image_source_dir / img_filename
            dest_img_path = image_dest_dir / img_filename

            if not src_img_path.exists():
                print(f"Image {src_img_path} not found. Skipping annotation for this image.")
                continue

            # Copy image to YOLO structure
            try:
                shutil.copy(src_img_path, dest_img_path)
            except shutil.SameFileError:
                pass # File already exists and is the same
            except Exception as e:
                print(f"Error copying image {src_img_path}: {e}. Skipping.")
                continue

            # Get image dimensions to normalize bounding box for YOLO format
            try:
                with Image.open(dest_img_path) as img:
                    img_w, img_h = img.size
            except Exception as e:
                print(f"Could not open image {dest_img_path} for dimensions: {e}. Skipping annotation.")
                continue

            # Convert to YOLO format (normalized x_center, y_center, width, height)
            box_x_center = ((x1 + x2) / 2) / img_w
            box_y_center = ((y1 + y2) / 2) / img_h
            box_width = (x2 - x1) / img_w
            box_height = (y2 - y1) / img_h

            class_id = CLASS_TO_ID[class_name]

            label_filename = Path(img_filename).stem + ".txt"
            with open(label_dest_dir / label_filename, 'a') as f: # 'a' to append for multiple objects in one image
                f.write(f"{class_id} {box_x_center:.6f} {box_y_center:.6f} {box_width:.6f} {box_height:.6f}\n")
    print(f"Finished processing {annotation_file_path}.")


#Dataset Download and Preparation Logic & YOLOv8 Training
def setup_dataset_and_train_model():
    """
    prepaing dataset, and training the YOLOv8 model.
    """
    yolo_data_root = Path(DATA_DIR) / "yolov8_data"
    data_yaml_path = yolo_data_root / "data.yaml"

    # Create necessary directories if they don't exist
    yolo_data_root.mkdir(parents=True, exist_ok=True)


    # Check if trained model and data.yaml exist (implies previous setup was successful)
    if os.path.exists(MODEL_PATH) and os.path.exists(data_yaml_path):
        print(f"Model ({MODEL_PATH}) and data.yaml ({data_yaml_path}) found. Skipping dataset prep and training.")
        return str(data_yaml_path)

    # --- Kaggle Download Section (only if model/data.yaml not found) ---
    kaggle_username = os.getenv("KAGGLE_USERNAME")
    kaggle_key = os.getenv("KAGGLE_KEY")

    if not kaggle_username or not kaggle_key:
        print("Kaggle credentials (KAGGLE_USERNAME, KAGGLE_KEY) not found as environment variables.")
        print("To train the model from scratch, set these environment variables.")
        print("For Hugging Face Spaces, it's better to upload the trained model directly to the repo.")
        # Create a dummy data.yaml if no Kaggle credentials and no existing model/data.yaml
        if not os.path.exists(data_yaml_path):
            print("Creating a dummy data.yaml for fallback.")
            # Ensure parent directory exists before opening the file
            data_yaml_path.parent.mkdir(parents=True, exist_ok=True)
            data_yaml_content = f"""
path: .
train: .
val: .
nc: {len(CLASSES)}
names: {CLASSES}
"""
            with open(data_yaml_path, "w") as f:
                f.write(data_yaml_content)
        return str(data_yaml_path) # Return path even if it's a incomplete

    print("Authenticating Kaggle API...")
    os.environ['KAGGLE_USERNAME'] = kaggle_username
    os.environ['KAGGLE_KEY'] = kaggle_key
    from kaggle.api.kaggle_api_extended import KaggleApi
    api = KaggleApi()
    api.authenticate()
    print("Kaggle API authenticated.")

    print(f"Downloading dataset '{DATASET_NAME}' to '{DATA_DIR}'...")
    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)

    dataset_zip_path = Path(DATA_DIR) / f"{DATASET_NAME.split('/')[-1]}.zip"
    if not dataset_zip_path.exists():
        api.dataset_download_files(DATASET_NAME, path=DATA_DIR, unzip=False)
        print("Dataset ZIP downloaded.")
    else:
        print("Dataset ZIP already exists, skipping download.")

    print(f"Unzipping dataset to {DATA_DIR}...")
    unzipped_dir = Path(DATA_DIR) / "Flickr_Logos_27_dataset"
    if not unzipped_dir.exists():
        with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
            zip_ref.extractall(DATA_DIR)
        print("Dataset unzipped.")
    else:
        print("Dataset already unzipped, skipping unzipping.")

    # Prepare directories for YOLOv8
    print("Creating YOLOv8 data structure...")
    (yolo_data_root / "images" / "train").mkdir(parents=True, exist_ok=True)
    (yolo_data_root / "images" / "val").mkdir(parents=True, exist_ok=True)
    (yolo_data_root / "labels" / "train").mkdir(parents=True, exist_ok=True)
    (yolo_data_root / "labels" / "val").mkdir(parents=True, exist_ok=True)
    print("YOLOv8 data directories created.")

    base_path = unzipped_dir
    train_images_src = base_path / "FlickrLogos-27_training" / "images"
    train_anno_src = base_path / "FlickrLogos-27_training" / "annotations.txt"
    query_images_src = base_path / "FlickrLogos-27_query" / "images"
    query_anno_src = base_path / "FlickrLogos-27_query" / "annotations.txt"

    process_annotation_file(train_anno_src, train_images_src, yolo_data_root / "images" / "train", yolo_data_root / "labels" / "train")
    process_annotation_file(query_anno_src, query_images_src, yolo_data_root / "images" / "val", yolo_data_root / "labels" / "val")

    # Create data.yaml for YOLOv8
    data_yaml_content = f"""
path: {yolo_data_root.resolve()}
train: images/train
val: images/val

nc: {len(CLASSES)}
names: {CLASSES}
"""
    # Ensure parent directory exists before opening the file
    data_yaml_path.parent.mkdir(parents=True, exist_ok=True)
    with open(data_yaml_path, "w") as f:
        f.write(data_yaml_content)
    print("data.yaml created for YOLOv8 training.")

    # --- Train YOLOv8 Model ---
    print("Starting YOLOv8 model training...")
    try:
        model = YOLO('yolov8n.pt') # Load a pretrained nano model for faster inference, Optimized training parameters

        results = model.train(
            data=str(data_yaml_path),
            epochs=100, # Increased epochs for better performance on Colab.
            imgsz=640, # Image size for training
            batch=16, # Batch size (adjust based on GPU memory)
            optimizer='AdamW', # AdamW optimizer often performs well
            lr0=0.01, # Initial learning rate
            lrf=0.01, # Final learning rate as a fraction of lr0
            momentum=0.937, # SGD momentum
            weight_decay=0.0005, # L2 regularization
            warmup_epochs=3.0, # Warmup for initial learning rate
            patience=50, # Early stopping patience (stop if no improvement for 50 epochs)
            augment=True, # Enable default augmentations (e.g., flip, mosaic, mixup)
            cos_lr=True, # Use cosine learning rate scheduler
            val=True, # Validate during training
            workers=8, # Number of Dataloader workers (adjust based on CPU cores)
            device=0, # Use GPU if available (0 for first GPU)
            # plots=False, # Disable plots during training to save resources if not needed
            # save_period=10 # Save checkpoint every 10 epochs
        )
        trained_model_source_path = Path("runs/detect/train/weights/best.pt")
        if trained_model_source_path.exists():
            shutil.copy(trained_model_source_path, MODEL_PATH)
            print(f"Trained model saved to {MODEL_PATH}")
        else:
            print("Error: Trained model not found after training.")
    except Exception as e:
        print(f"Error during YOLOv8 training: {e}")
        print("Proceeding to inference. If model was not saved, it will fall back to default YOLOv8n.")

    return str(data_yaml_path) # Return path to data.yaml

# Global variables for models to ensure they are loaded only once
yolo_model_instance = None
openai_client_instance = None
default_font = None # To store a loaded font for drawing text

# Initialize models (YOLO and OpenAI)
def initialize_models():
    """Initializes the YOLO model and OpenAI client globally."""
    global yolo_model_instance, openai_client_instance, default_font

    if yolo_model_instance is None:
        try:
            if os.path.exists(MODEL_PATH):
                print(f"Loading YOLO model from {MODEL_PATH}...")
                yolo_model_instance = YOLO(MODEL_PATH)
                print("YOLO model loaded.")
            else:
                print(f"Trained YOLO model not found at {MODEL_PATH}. loading default yolov8n.pt for inference (less accurate).")
                yolo_model_instance = YOLO('yolov8n.pt') # Fallback to a pre-trained general model
        except Exception as e:
            print(f"Error loading YOLO model: {e}")
            yolo_model_instance = None # Set to None if loading fails

    if openai_client_instance is None and OPENAI_API_KEY:
        try:
            openai_client_instance = OpenAI(OPENAI_API_KEY)
            print("OpenAI client initialized.")
        except Exception as e:
            print(f"Error initializing OpenAI client: {e}")
            openai_client_instance = None
    elif not OPENAI_API_KEY:
        print("OPENAI_API_KEY not set. and will not be used for brand identification.")


    if default_font is None:
        try:

            default_font = ImageFont.truetype("arial.ttf", 24)
        except IOError:

            default_font = ImageFont.load_default()
        print("Font for drawing labels loaded.")


# GPT-4o Vision API Call
def get_brand_name_from_gpt4o(cropped_image_pil):
    """
    Sending a cropped image to GPT-4o Vision API to identify the brand.
    Args:
        cropped_image_pil (PIL.Image.Image): The cropped image of the logo.
    Returns:
        str: The identified brand name or an error/unknown message.
    """
    if not openai_client_instance:
        return "N/A (API Key Missing / Client Error)"

    buffered = io.BytesIO()
    cropped_image_pil.save(buffered, format="PNG")
    img_base64 = base66.b64encode(buffered.getvalue()).decode("utf-8")

    try:
        response = openai_client_instance.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "What brand logo is clearly visible in this image? Respond with only the brand name. If no clear brand logo is present or I am unsure, respond with 'Unknown'."},
                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}", "detail": "low"}} # "low" detail for faster processing, lower cost
                    ],
                }
            ],
            max_tokens=20, # Keep max_tokens low for concise, direct responses
            temperature=0.0, # Low temperature for factual, deterministic output
        )
        brand_name = response.choices[0].message.content.strip()
        return brand_name
    except Exception as e:
        print(f"Error calling GPT-4o: {e}")
        return "Error (API Call Failed)"

# Gradio Inference Function
def process_image_with_detection(image_pil):
    """
    Processes an uploaded image: runs YOLOv8 detection, sends crops to GPT-4o,
    and returns an annotated image.
    Args:
        image_pil (PIL.Image.Image): The input image from Gradio.
    Returns:
        tuple: (annotated_image_pil, list_of_annotations_for_gradio)
    """
    if yolo_model_instance is None:
        gr.Warning("YOLO model not loaded. Please check logs for errors or ensure model is trained")
        return image_pil, [] # Return original image if model not loaded

    print("Running YOLOv8 inference...")
    start_time_yolo = time.time()

    results = yolo_model_instance(image_pil, conf=0.3, iou=0.5, verbose=False)
    end_time_yolo = time.time()
    print(f"YOLOv8 inference took {end_time_yolo - start_time_yolo:.2f} seconds.")

    annotated_image_pil = image_pil.copy()
    draw = ImageDraw.Draw(annotated_image_pil)

    annotations = [] # List for Gradio AnnotatedImage component

    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            confidence = box.conf[0]

            # Ensure coordinates are within image bounds to prevent cropping errors
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(image_pil.width, x2)
            y2 = min(image_pil.height, y2)

            if x2 <= x1 or y2 <= y1: # Skip invalid or zero-area boxes
                continue

            cropped_image = image_pil.crop((x1, y1, x2, y2))

            print(f"Calling GPT-4o for brand identification on cropped region {x1, y1, x2, y2}...")
            start_time_gpt = time.time()
            brand_name = get_brand_name_from_gpt4o(cropped_image)
            end_time_gpt = time.time()
            print(f"GPT-4o call took {end_time_gpt - start_time_gpt:.2f} seconds. Identified: {brand_name}")

            # Add annotation for Gradio's AnnotatedImage
            annotations.append(((x1, y1, x2, y2), f"{brand_name} ({confidence:.2f})"))


    return annotated_image_pil, annotations



In [None]:
# Main application logic
if __name__ == "__main__":
    print("Application starting...")
    print("Running setup_dataset_and_train_model (skips if model/data.yaml found)...")
    setup_dataset_and_train_model()

    # Initialize models(YOLO and OpenAI client)after setup
    print("Initializing models (YOLO and OpenAI client)")
    initialize_models()

    print("Creating Gradio interface")
    iface = gr.Interface(
        fn=process_image_with_detection,
        inputs=gr.Image(type="pil", label="Upload Image"),
        outputs=gr.AnnotatedImage(label="Detected Logos and Brands", show_legend=True, color_map={
            # We can define specific colors for known brands here for consistency
            # Ex "Adidas": "#000000", "Nike": "#FF0000"
        }),
        title="End-to-End Logo Recognition System",
        description=(
            "<p>Upload an image to detect logos using a fine-tuned YOLOv8 model. "
            "For each detected logo, the cropped region is sent to OpenAI's GPT-4o "
            "to identify the brand name.</p>"

        ),
        allow_flagging="never", # Disable flagging feature
        examples=[
            # Add paths to example images

        ]
    )


    print("Launching Gradio application. Check the public URL for access.")
    iface.launch(debug=True, share=True)

Application starting...
Running setup_dataset_and_train_model (skips if model/data.yaml found)...
Kaggle credentials (KAGGLE_USERNAME, KAGGLE_KEY) not found as environment variables.
To train the model from scratch, set these environment variables.
For Hugging Face Spaces, it's better to upload the trained model directly to the repo.
Initializing models (YOLO and OpenAI client)
OPENAI_API_KEY not set. GPT-4o will not be used for brand identification.
Creating Gradio interface




Launching Gradio application. Check the public URL for access.
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://acc24cfe2072740cc0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Running YOLOv8 inference...
YOLOv8 inference took 0.32 seconds.
Calling GPT-4o for brand identification on cropped region (1420, 979, 1599, 1094)...
GPT-4o call took 0.00 seconds. Identified: N/A (API Key Missing / Client Error)
