In [1]:
from dotenv import load_dotenv
import os
import shutil
import requests
import json
from pathlib import Path
from uuid import uuid4

load_dotenv() 

source_folder = 'Images_Input'
destination_folder = 'Images_Output'

azure_key = os.getenv('API_KEY')
azure_region = os.getenv('REGION')
azure_endpoint = os.getenv('URL')




In [2]:
def analyze_image(image_path, endpoint, key):
    """
    Sends an image to Azure Vision's analyze API and returns the JSON response.
    Requests a range of visual features to build a rich metadata profile for each photo.

    Args:
        image_path (str): Path to the image file to analyze.
        endpoint (str): Full Azure Vision endpoint URL, e.g. 'https://<region>.api.cognitive.microsoft.com'
        key (str): Azure Vision API subscription key.

    Returns:
        dict: JSON response from Azure Vision containing analysis results.
    """
    analyze_url = f"{endpoint}/vision/v3.2/analyze?visualFeatures=Description,Objects,Tags,Categories,Faces,Adult,Brands,Color"
    headers = {
        'Ocp-Apim-Subscription-Key': key,
        'Content-Type': 'application/octet-stream'
    }
    with open(image_path, 'rb') as image_data:
        response = requests.post(analyze_url, headers=headers, data=image_data)
    response.raise_for_status()
    return response.json()


In [3]:
def process_images(source_folder, destination_folder, endpoint, key, confidence_threshold=0.7):
    """
    Processes all images in the source_folder using Azure AI Vision,
    extracts and organizes smart metadata, and saves both categorized image copies
    and a detailed metadata.json summary in the destination_folder.

    For each photo:
      - Detects objects, tags, categories above a confidence threshold
      - Adds creative fields: descriptions, object locations, face info, brand/logo detections,
        dominant and accent colors, file size, dimensions, OCR text, unique ID, and more
      - Copies the image into folders named after every strong label found, supporting multi-labeling
      - Saves all this data in a single metadata.json file at the destination root

    Args:
        source_folder (str): Path to folder with input images.
        destination_folder (str): Path to output folder for results and metadata.json.
        endpoint (str): Azure Vision endpoint URL.
        key (str): Azure Vision API subscription key.
        confidence_threshold (float): Minimum confidence to consider a label valid.
    """
    import shutil
    import os
    import json
    from pathlib import Path
    from uuid import uuid4
    from PIL import Image

    metadata = []
    dest = Path(destination_folder)
    dest.mkdir(exist_ok=True)

    for img_file in os.listdir(source_folder):
        img_path = os.path.join(source_folder, img_file)
        # Only process standard image formats
        if not img_file.lower().endswith(('jpg', 'jpeg', 'png', 'bmp')):
            continue

        # Get predictions and analysis from Azure Vision
        analysis = analyze_image(img_path, endpoint, key)

        # Short caption generated by Azure Vision
        description = analysis.get('description', {}).get('captions', [{}])[0].get('text', '')

        # Objects with names, bounding box, confidence - only above threshold
        objects = [
            {
                'name': obj['object'],
                'confidence': obj.get('confidence', None),
                'rectangle': obj.get('rectangle', None)
            }
            for obj in analysis.get('objects', [])
            if 'confidence' in obj and obj['confidence'] >= confidence_threshold
        ]

        # Tags with names and confidences - only above threshold
        tags = [
            {'name': tag['name'], 'confidence': tag['confidence']}
            for tag in analysis.get('tags', [])
            if isinstance(tag, dict) and tag.get('confidence', 1.0) >= confidence_threshold
        ]
        tag_names = [tag['name'] for tag in tags]

        # Categories - often scene types, above threshold
        categories = [
            {'name': cat['name'], 'score': cat['score']}
            for cat in analysis.get('categories', [])
            if 'score' in cat and cat['score'] >= confidence_threshold and 'name' in cat
        ]
        category_names = [cat['name'] for cat in categories]

        # Detected faces with extra info if present
        faces = [
            {
                'gender': face.get('gender'),
                'age': face.get('age'),
                'rectangle': face.get('faceRectangle')
            }
            for face in analysis.get('faces', [])
        ]

        # NSFW, racy, gore metadata from Vision API
        adult_content = analysis.get('adult', {})

        # Detected brands/logos, rare, but interesting
        brands = [
            {'name': b['name'], 'confidence': b['confidence']}
            for b in analysis.get('brands', [])
            if b.get('confidence', 0.0) >= confidence_threshold
        ] if 'brands' in analysis else []

        # Extract prominent colors and color mood info
        color_data = analysis.get('color', {})
        dominant_foreground_color = color_data.get('dominantColorForeground', '')
        dominant_background_color = color_data.get('dominantColorBackground', '')
        accent_color = color_data.get('accentColor', '')
        dominant_colors = color_data.get('dominantColors', [])

        # OCR Text: Azure can extract visible text sometimes
        ocr_text = ""
        if 'text' in analysis:
            ocr_text = analysis['text']

        # Image width/height (pixel dimensions)
        try:
            with Image.open(img_path) as img:
                width, height = img.size
        except Exception:
            width, height = None, None

        random_id = str(uuid4())  # Each file gets a unique id
        img_size = os.path.getsize(img_path)

        # Gather all into a metadata record per photo
        meta = {
            'filename': img_file,
            'filepath': os.path.abspath(img_path),
            'uuid': random_id,
            'description': description,
            'objects': objects,
            'tags': tags,
            'categories': categories,
            'faces': faces,
            'adult_content': adult_content,
            'brands': brands,
            'colors': {
                'dominant_foreground_color': dominant_foreground_color,
                'dominant_background_color': dominant_background_color,
                'accent_color': accent_color,
                'dominant_colors': dominant_colors
            },
            'ocr_text': ocr_text,
            'image_size_bytes': img_size,
            'image_dimensions': {'width': width, 'height': height}
        }
        metadata.append(meta)

        # Multi-label: each relevant object, tag, and category above threshold creates a folder for copying the image
        labels = set([obj['name'] for obj in objects] + tag_names + category_names)
        if not labels:
            labels = {'Uncategorized'}  # Fallback if nothing passes threshold
        for label in labels:
            safe_label = str(label).replace('/', '_')
            label_dir = dest / safe_label
            label_dir.mkdir(exist_ok=True)
            shutil.copy2(img_path, label_dir / img_file)

    # Final output: write full metadata for all processed images as a pretty JSON
    with open(dest / 'metadata.json', 'w', encoding='utf-8') as f:
        json.dump(metadata, f, ensure_ascii=False, indent=4)


In [4]:
#Call the function to delete the output folder in case you want to start fresh
def delete_output_folder(destination_folder):
    if os.path.exists(destination_folder):
        shutil.rmtree(destination_folder)

In [5]:
delete_output_folder(destination_folder)
process_images(source_folder, destination_folder, azure_endpoint, azure_key, confidence_threshold=0.9)