In [1]:
# this project was developed by Carolina Neves, student number 20231647 at NOVA IMS

import os
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Retrieve Azure Vision credentials from environment variables
key = os.getenv("AZURE_VISION_KEY") 
endpoint = os.getenv("AZURE_VISION_ENDPOINT") 

# define the folders of the photos
Source_Folder = "input_photos"
Destination_Folder = "output_photos"

# create the destination folder if it doesn't exist
os.makedirs(Destination_Folder, exist_ok=True)

# create metadata folder
Metadata_Folder = os.path.join(Destination_Folder, "metadata.json")

In [2]:
# authenticate the client

def authenticate_client():
    vision_client = ImageAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))
    return vision_client

client = authenticate_client()

In [3]:
import shutil
import json

# check if the file is an image 
image_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff"]

def is_image_file(filename):
    return filename.lower().endswith(tuple(image_extensions))

# copy the file to the destination folder
def copy_file(source_path, destination_path):
    os.makedirs(os.path.dirname(destination_path), exist_ok=True)
    shutil.copy2(source_path, destination_path)

# saves metadata to a json file
def save_metadata(metadata, json_path):
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(metadata, json_file, ensure_ascii=False, indent=4)


In [4]:
from pathlib import Path

vision_source = Path(Source_Folder)

# analyze the photo and return tags and objects, confidence level is adjustable but default is 0.8
def analyze_photo(photo_path: str, confidence: float = 0.8): 
    with open(photo_path, "rb") as f: 
        image_data = f.read()

        result = client.analyze(
            image_data=image_data,
            visual_features=[
                VisualFeatures.TAGS, 
                VisualFeatures.OBJECTS, 
                VisualFeatures.CAPTION],
            language = "en") 

    # Process TAGS
    tags_list = []
    tags_dict = result.tags.as_dict()
    if 'values' in tags_dict and tags_dict['values']:
        for tag in tags_dict['values']:
            if tag.get('confidence', 0) >= confidence:
                tags_list.append({
                    'name': tag['name'],
                    'confidence': tag['confidence']
                })

    # Process OBJECTS
    objects_list = []
    objects_dict = result.objects.as_dict()
    
    # check if there are objects detected and returns confidence level, bounding box and name
    if 'values' in objects_dict and objects_dict['values']:
        for obj in objects_dict['values']:

            # every object can have multiple tags
            if 'tags' in obj and obj['tags']:
                for tag in obj['tags']:
                    if tag.get('confidence', 0) >= confidence:
                        objects_list.append({
                            'name': tag['name'],
                            'confidence': tag['confidence'],
                            'bounding_box': obj.get('boundingBox', {})
                        })
    
    # Process CAPTION 
    main_caption = {}
    if hasattr(result, 'caption') and result.caption:
        caption_dict = result.caption.as_dict()
        main_caption = {
            'text': caption_dict.get('text', ''),
            'confidence': caption_dict.get('confidence', 0)
        }

    # return the tags and objects as a json string
    return json.dumps({
        "tags": tags_list,
        "objects": objects_list,
        "caption": main_caption
    })

In [5]:
from datetime import datetime

# distribute photos onto folders based on the objects detected and saves the metadata
metadata_list = []

for image_file in os.listdir(Source_Folder):
    if is_image_file(image_file):
        image_path = os.path.join(Source_Folder, image_file)

        # analyze_photo is a JSON string, so convert to dictionary
        analysis_json = analyze_photo(image_path, confidence=0.8)
        analysis = json.loads(analysis_json)  # Convert JSON to dictionary

        # determine destination subfolder based on detected objects 
        categories = set()

        # add categories from objects and tags
        if analysis["objects"]:
            for obj in analysis["objects"]:
                categories.add(obj['name'].lower())

        if analysis["tags"]:
            for tag in analysis["tags"]:
                categories.add(tag['name'].lower())

        if not categories:
            categories = {"uncategorized"}

        # copy the image to all relevant category folders
        destination_paths = []
        for category in categories:
            # destination subfolder path
            destination_subfolder = os.path.join(Destination_Folder, category)
            # create the subfolder if it doesn't exist
            os.makedirs(destination_subfolder, exist_ok=True)

            # copy the image to the destination subfolder
            destination_path = os.path.join(destination_subfolder, image_file)
            copy_file(image_path, destination_path)
            destination_paths.append(destination_path)

        # prepare metadata entry
        metadata_entry = {
            "filename": image_file,
            "original_path": image_path,
            "new_paths": destination_paths,
            "caption": analysis["caption"],
            "categories": list(categories),
            "tags": analysis["tags"],
            "objects": analysis["objects"],
            "analysis_date": datetime.now().isoformat()
        }
        metadata_list.append(metadata_entry)

# save all metadata to a json file
save_metadata(metadata_list, Metadata_Folder)

print(f"{len(metadata_list)} images processed")
print(f"Metadata saved in: {Metadata_Folder}")

6 images processed
Metadata saved in: output_photos/metadata.json
