# Google Vision API (Google Drive dataset)

In [1]:
import os
import math
from collections import Counter
from google.cloud import vision
import re
import pandas as pd
import requests
from dotenv import load_dotenv
from tqdm import tqdm  # Import tqdm for the progress bar
import io

# Authentication to Google API
load_dotenv()
google_credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
if not google_credentials_path:
    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS environment variable is not set in the .env file.")

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_credentials_path

# Example usage of WORD regex
WORD = re.compile(r"\w+")

# Function to load image from the local folder and return it to Google Vision API
def load_image_from_file(image_path):
    try:
        with open(image_path, 'rb') as image_file:
            return vision.Image(content=image_file.read())
    except Exception as e:
        return None

# Function to generate labels using Google Vision API
def generate_labels_text(image_path):
    try:
        client = vision.ImageAnnotatorClient()
        image = load_image_from_file(image_path)
        
        if image is None:
            return None

        response = client.label_detection(image=image)
        labels = response.label_annotations

        labels_text = ', '.join(label.description for label in labels[:5])  # Limit to 5 labels
        return labels_text
    except Exception as e:
        return None

# Function to detect text in the image using Google Vision API
def detect_text(image_path):
    try:
        client = vision.ImageAnnotatorClient()
        image = load_image_from_file(image_path)

        if image is None:
            return None

        response = client.document_text_detection(image=image)
        texts = response.text_annotations

        detected_text = " ".join(text.description.replace("\n", " ") for text in texts)
        if response.error.message:
            raise Exception(response.error.message)

        return detected_text
    except Exception as e:
        return f"Error detecting text: {str(e)}"

# Function to perform web detection using Google Vision API
def detect_web_entities(image_path):
    try:
        client = vision.ImageAnnotatorClient()
        image = load_image_from_file(image_path)

        if image is None:
            return None

        response = client.web_detection(image=image)
        web_entities = response.web_detection.web_entities

        web_entities_text = ', '.join(entity.description for entity in web_entities if entity.description)
        return web_entities_text
    except Exception as e:
        return None

# Function to perform landmark detection using Google Vision API
def detect_landmarks(image_path):
    try:
        client = vision.ImageAnnotatorClient()
        image = load_image_from_file(image_path)

        if image is None:
            return None

        response = client.landmark_detection(image=image)
        landmarks = response.landmark_annotations

        landmarks_text = ', '.join(landmark.description for landmark in landmarks)
        return landmarks_text
    except Exception as e:
        return None

# Function to perform object localization using Google Vision API
def localize_objects(image_path):
    try:
        client = vision.ImageAnnotatorClient()
        image = load_image_from_file(image_path)

        if image is None:
            return None

        response = client.object_localization(image=image)
        objects = response.localized_object_annotations

        objects_text = ', '.join(obj.name for obj in objects)
        return objects_text
    except Exception as e:
        return None

# Function to log errors to a file
def log_error(image_path):
    with open('error.log', 'a') as file:
        file.write(f"{image_path}\n")

# Load the CSV file with the relevant columns
df = pd.read_csv('NML_metadata.csv')

# Initialize lists for image paths, labels text, detected text, web entities, landmarks, and objects
image_paths = []
labels_texts = []
detected_texts = []
web_entities_texts = []
landmarks_texts = []
objects_texts = []

# Process each row with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing Images", unit="row"):
    # Safely convert 'filespec' to string, handling any invalid values (e.g., NaN, float)
    image_filename = str(row['filespec']) if pd.notna(row['filespec']) else ''
    image_path = os.path.join('NML_Google_images', image_filename)

    # Ensure the path is valid before continuing
    if os.path.exists(image_path):
        image_paths.append(image_path)

        # Generate labels text
        labels_text = generate_labels_text(image_path)
        labels_texts.append(labels_text if labels_text else "Labels text not generated")

        # Detect text in the image
        detected_text = detect_text(image_path)
        detected_texts.append(detected_text)

        # Detect web entities
        web_entities_text = detect_web_entities(image_path)
        web_entities_texts.append(web_entities_text if web_entities_text else "No web entities detected")

        # Detect landmarks
        landmarks_text = detect_landmarks(image_path)
        landmarks_texts.append(landmarks_text if landmarks_text else "No landmarks detected")

        # Detect objects
        objects_text = localize_objects(image_path)
        objects_texts.append(objects_text if objects_text else "No objects detected")
    else:
        image_paths.append(None)
        labels_texts.append("No image found")
        detected_texts.append("No text detected")
        web_entities_texts.append("No web entities detected")
        landmarks_texts.append("No landmarks detected")
        objects_texts.append("No objects detected")
        log_error(image_path)


# Add the extracted data to the DataFrame
df['Extracted Image Path'] = image_paths
df['Generated Labels Text'] = labels_texts
df['Detected Text'] = detected_texts
df['Web Entities'] = web_entities_texts
df['Landmarks'] = landmarks_texts
df['Objects'] = objects_texts

# Save the updated DataFrame to a new CSV
df.to_csv('updated_with_all_features_imagesFromHD4.csv', index=False)


Processing Images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 943/943 [47:59<00:00,  3.05s/row]


# Google Vision informed GPT (Google Drive dataset)

In [2]:
import openai
import pandas as pd
from tqdm import tqdm
import os
from dotenv import load_dotenv
import unicodedata

# Load the .env file
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

# Function to generate alt text using OpenAI
def generate_alt_text_with_context(image_path, descriptions):
    gpt_prompt_template = os.getenv("GPT_PROMPT")
    if not gpt_prompt_template:
        raise ValueError("GPT_PROMPT environment variable is not set in the .env file.")
    
    # Format the prompt with the provided image path and descriptions
    prompt = gpt_prompt_template.format(image_url=image_path, descriptions=descriptions)
    
    try:
        # OpenAI API call
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for generating alt text for images."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            temperature=0.3
        )
        alt_text = response['choices'][0]['message']['content'].strip()
        alt_text = unicodedata.normalize('NFC', alt_text)
        return alt_text
    except Exception as e:
        return f"Error generating alt text: {str(e)}"

# Load the CSV file
df = pd.read_csv('updated_with_all_features_imagesFromHD4.csv', encoding='utf-8')

# Initialize a list for the alt text
alt_texts = []

# Process each row and generate alt text with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Alt Text", unit="row"):
    try:
        image_path = row['Extracted Image Path']
        
        if pd.isna(image_path) or not str(image_path).strip():
            alt_texts.append("")  # Empty field if no image URL
            continue
        
        descriptions = (
            f"Title: {row['title/en']}, "
            f"Labels: {row['Generated Labels Text']}, "
            f"Detected Text: {row['Detected Text']}, "
            f"Web Detection: {row['Web Entities']}, "
            f"Landmark Detection: {row['Landmarks']}, "
            f"Object Localization: {row['Objects']}"
        )
        
        alt_text = generate_alt_text_with_context(image_path, descriptions)
        
        if not alt_text.startswith("Alt text:"):
            alt_text = f"Alt text: {alt_text}"
        alt_texts.append(alt_text)
    except Exception as e:
        alt_texts.append(f"Error generating alt text: {str(e)}")
        print(f"Error processing row {index}: {str(e)}")

# Add the generated alt text to the DataFrame
df['GPT alt text'] = alt_texts

# Save the updated DataFrame to a new CSV
output_file = 'updated_with_alt_text_GPT_imagesFromHD4.csv'
df.to_csv(output_file, encoding='utf-8', index=False)
print(f"Alt text generation completed. Updated file saved as '{output_file}'.")

Generating Alt Text: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 943/943 [08:24<00:00,  1.87row/s]

Alt text generation completed. Updated file saved as 'updated_with_alt_text_GPT_imagesFromHD4.csv'.





# GPT only (Google Drive dataset)

In [3]:
import openai
import pandas as pd
from tqdm import tqdm
import os
from dotenv import load_dotenv
import unicodedata

# Load the .env file
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

# Function to generate alt text using OpenAI
def generate_alt_text_with_context(image_path, descriptions):
    gpt_prompt_template = os.getenv("GPT_PROMPT")
    if not gpt_prompt_template:
        raise ValueError("GPT_PROMPT environment variable is not set in the .env file.")
    
    # Format the prompt with the provided image path and descriptions
    prompt = gpt_prompt_template.format(image_url=image_path, descriptions=descriptions)
    
    try:
        # OpenAI API call
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for generating alt text for images."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            temperature=0.3
        )
        alt_text = response['choices'][0]['message']['content'].strip()
        alt_text = unicodedata.normalize('NFC', alt_text)
        return alt_text
    except Exception as e:
        return f"Error generating alt text: {str(e)}"

# Load the CSV file
df = pd.read_csv('updated_with_all_features_imagesFromHD4.csv', encoding='utf-8')

# Initialize a list for the alt text
alt_texts = []

# Process each row and generate alt text with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Alt Text", unit="row"):
    try:
        image_path = row['Extracted Image Path']
        
        if pd.isna(image_path) or not str(image_path).strip():
            alt_texts.append("")  # Empty field if no image URL
            continue
        
        descriptions = (
            f"Title: {row['title/en']}"
        )
        
        alt_text = generate_alt_text_with_context(image_path, descriptions)
        
        if not alt_text.startswith("Alt text:"):
            alt_text = f"Alt text: {alt_text}"
        alt_texts.append(alt_text)
    except Exception as e:
        alt_texts.append(f"Error generating alt text: {str(e)}")
        print(f"Error processing row {index}: {str(e)}")

# Add the generated alt text to the DataFrame
df['GPT alt text'] = alt_texts

# Save the updated DataFrame to a new CSV
output_file = 'updated_with_alt_text_GPTonly_imagesFromHD4.csv'
df.to_csv(output_file, encoding='utf-8', index=False)
print(f"Alt text generation completed. Updated file saved as '{output_file}'.")

Generating Alt Text: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 943/943 [08:42<00:00,  1.80row/s]

Alt text generation completed. Updated file saved as 'updated_with_alt_text_GPTonly_imagesFromHD4.csv'.



