# Use Google vision to build features to inform GPT

In [None]:
import os
import math
from collections import Counter
from google.cloud import vision
import re
import pandas as pd
import requests
from dotenv import load_dotenv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time
from tqdm import tqdm  # Import tqdm

# Authentication to Google API
# Load environment variables from the .env file
load_dotenv()

# Set Google Application Credentials from .env file
google_credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
if not google_credentials_path:
    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS environment variable is not set in the .env file.")

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = google_credentials_path

# Example usage of WORD regex
WORD = re.compile(r"\w+")

# Function to extract image URL from a webpage using Selenium
def extract_image_url_selenium(page_url):
    try:
        chrome_options = Options()
        chrome_options.add_argument("--headless")  # Run in headless mode
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--no-sandbox")

        service = Service()  # Replace with ChromeDriver path if needed
        driver = webdriver.Chrome(service=service, options=chrome_options)

        driver.get(page_url)
        time.sleep(2)  # Wait for the page to load

        img_tag = driver.find_element(By.CSS_SELECTOR, 'img.exhibition__image.image--has-caption')
        image_url = img_tag.get_attribute("src")
        driver.quit()
        return image_url
    except Exception as e:
        driver.quit()
        return None

# Function to generate labels using Google Vision API
def generate_labels_text(image_url):
    try:
        client = vision.ImageAnnotatorClient()
        image = vision.Image()
        image.source.image_uri = image_url

        response = client.label_detection(image=image)
        labels = response.label_annotations

        # Generate labels text using the labels
        labels_text = ', '.join(label.description for label in labels[:5])  # Limit to 5 labels
        return labels_text
    except Exception as e:
        return None

# Function to detect text in the image using Google Vision API
def detect_text(image_url):
    try:
        client = vision.ImageAnnotatorClient()

        # Fetch image data from URL
        image_data = requests.get(image_url).content
        image = vision.Image(content=image_data)

        # Use document_text_detection for dense text
        response = client.document_text_detection(image=image)
        texts = response.text_annotations

        # Combine detected text into a single string, removing line breaks
        detected_text = " ".join(text.description.replace("\n", " ") for text in texts)
        if response.error.message:
            raise Exception(
                "{}\nFor more info on error messages, check: "
                "https://cloud.google.com/apis/design/errors".format(response.error.message)
            )

        return detected_text
    except Exception as e:
        return f"Error detecting text: {str(e)}"

# Function to perform web detection using Google Vision API
def detect_web_entities(image_url):
    try:
        client = vision.ImageAnnotatorClient()
        image = vision.Image()
        image.source.image_uri = image_url

        response = client.web_detection(image=image)
        web_entities = response.web_detection.web_entities

        # Combine detected web entities into a single string
        web_entities_text = ', '.join(entity.description for entity in web_entities if entity.description)
        return web_entities_text
    except Exception as e:
        return None

# Function to perform landmark detection using Google Vision API
def detect_landmarks(image_url):
    try:
        client = vision.ImageAnnotatorClient()
        image = vision.Image()
        image.source.image_uri = image_url

        response = client.landmark_detection(image=image)
        landmarks = response.landmark_annotations

        # Combine detected landmark descriptions into a single string
        landmarks_text = ', '.join(landmark.description for landmark in landmarks)
        return landmarks_text
    except Exception as e:
        return None

# Function to perform object localization using Google Vision API
def localize_objects(image_url):
    try:
        client = vision.ImageAnnotatorClient()
        image = vision.Image()
        image.source.image_uri = image_url

        response = client.object_localization(image=image)
        objects = response.localized_object_annotations

        # Combine detected object names into a single string
        objects_text = ', '.join(obj.name for obj in objects)
        return objects_text
    except Exception as e:
        return None

# Function to log errors to a file
def log_error(page_url):
    with open('error.log', 'a') as file:
        file.write(f"{page_url}\n")

# Load the CSV file
df = pd.read_csv('test_data.csv')

# Initialize lists for image URLs, labels text, detected text, web entities, landmarks, and objects
image_urls = []
labels_texts = []
detected_texts = []
web_entities_texts = []
landmarks_texts = []
objects_texts = []

# Process each page URL with a progress bar
for page_url in tqdm(df['Image link'], desc="Processing Image URLs", unit="URL"):
    image_url = extract_image_url_selenium(page_url)
    if image_url:
        image_urls.append(image_url)

        # Generate labels text
        labels_text = generate_labels_text(image_url)
        labels_texts.append(labels_text if labels_text else "Labels text not generated")

        # Detect text in the image
        detected_text = detect_text(image_url)
        detected_texts.append(detected_text)

        # Detect web entities
        web_entities_text = detect_web_entities(image_url)
        web_entities_texts.append(web_entities_text if web_entities_text else "No web entities detected")

        # Detect landmarks
        landmarks_text = detect_landmarks(image_url)
        landmarks_texts.append(landmarks_text if landmarks_text else "No landmarks detected")

        # Detect objects
        objects_text = localize_objects(image_url)
        objects_texts.append(objects_text if objects_text else "No objects detected")
    else:
        image_urls.append(None)
        labels_texts.append("No image found")
        detected_texts.append("No text detected")
        web_entities_texts.append("No web entities detected")
        landmarks_texts.append("No landmarks detected")
        objects_texts.append("No objects detected")
        log_error(page_url)  # Log the page URL with no image found

# Add the extracted data to the DataFrame
df['Extracted Image URL'] = image_urls
df['Generated Labels Text'] = labels_texts
df['Detected Text'] = detected_texts
df['Web Entities'] = web_entities_texts
df['Landmarks'] = landmarks_texts
df['Objects'] = objects_texts

# Save the updated DataFrame to a new CSV
df.to_csv('updated_with_all_features.csv', index=False)


# Use descriptive features to support GPT to generate alt text

In [None]:
# -*- coding: utf-8 -*-
from dotenv import load_dotenv
import openai
import pandas as pd
import os
from tqdm import tqdm  # Import tqdm for the progress bar
import unicodedata

# Load the .env file
load_dotenv()

# Make sure to set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Function to generate alt text using OpenAI
def generate_alt_text_with_context(image_url, descriptions):
    # Retrieve the prompt from the environment variable
    gpt_prompt_template = os.getenv("GPT_PROMPT")
    if not gpt_prompt_template:
        raise ValueError("GPT_PROMPT environment variable is not set in the .env file.")
    
    # Format the prompt with the provided image URL and descriptions
    prompt = gpt_prompt_template.format(image_url=image_url, descriptions=descriptions)
    
    try:
        # OpenAI API call
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for generating alt text for images."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=200,
            temperature=0.3
        )
        # Extract and normalize the response
        alt_text = response['choices'][0]['message']['content'].strip()
        alt_text = unicodedata.normalize('NFC', alt_text)
        return alt_text
    except Exception as e:
        return f"Error generating alt text: {str(e)}"

# Load the CSV file
try:
    df = pd.read_csv('updated_with_all_features.csv', encoding='utf-8')
except Exception as e:
    print(f"Error loading CSV file: {str(e)}")
    exit()

# Initialize a list for the alt text
alt_texts = []

# Process each row and generate alt text with a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Alt Text", unit="row"):
    try:
        image_url = row['Extracted Image URL']
        # Concatenate the relevant fields, excluding Category/discipline
        descriptions = (
            f"Title: {row['Title']}, "
            f"Labels: {row['Generated Labels Text']}, "
            f"Detected Text: {row['Detected Text']}, "
            f"Web Detection: {row['Web Entities']}, "
            f"Landmark Detection: {row['Landmarks']}, "
            f"Object Localization: {row['Objects']}"
        )
        
        # Generate alt text
        alt_text = generate_alt_text_with_context(image_url, descriptions)
        
        # Ensure consistent prefix
        if not alt_text.startswith("Alt text:"):
            alt_text = f"Alt text: {alt_text}"
        alt_texts.append(alt_text)
    except Exception as e:
        alt_texts.append(f"Error generating alt text: {str(e)}")
        print(f"Error processing row {index}: {str(e)}")

# Add the generated alt text to the DataFrame
df['GPT alt text'] = alt_texts

# Save the updated DataFrame to a new CSV
output_file = 'updated_with_alt_text_GPT_ImpP5.csv'
try:
    df.to_csv(output_file, encoding='utf-8', index=False)
    print(f"Alt text generation completed. Updated file saved as '{output_file}'.")
except Exception as e:
    print(f"Error saving updated CSV file: {str(e)}")
