## Setup

In [1]:
import os
import json
import getpass
import googlemaps

from utils.helpers import call_openai, prep_images, get_top_n_results
from utils.eval import calculate_distance
from utils.constants import *

# OpenAI API Key
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API key: ")

# Initialize the Google Maps client with your API key
gmaps = googlemaps.Client(key=os.environ["GOOGLE_API_KEY"])


# Path to your image
id = "106"
image_inputs = prep_images(id)
results = {}

# Define model
model = "gpt-4o"

# Load target location
with open(f'./data/master.json', 'r') as f:
    target_locations = json.load(f)
target = target_locations[id]

## System Message

In [3]:
SYS_MESSAGE_PROMPT = """You are an expert at the game GeoGuessr. \
    You are trying to determine the location of an image in world coordinates."""
SYS_MESSAGE = {"role": "system", "content": SYS_MESSAGE_PROMPT}
JSON_PROMPT = """Only return a valid json string (RCF8259). Do provide any other commentary. \
    Do not wrap the JSON in markdown such as ```json. Only use the data from the provided content."""


## Tool 1: Describe image OCR

In [8]:
# Define prompt
DESCRIBE_TEXT_PROMPT = """USER: Your task is to describe this image in detail. Here are some instructions:
- Focus entirely on the text found on the image that might provide location clues.
- Prioritize street signs, stores, landmarks, and other specific features that provide location clues.
- Pay attention to any public transportation, which may provide great local information.
- Ignore advertisements unless they contain local information.
- Ignore the text related to the Geoguesser game.
- Sort the items based on how useful the information might be for someone trying to determine where in the world the image was taken.
- Return a json following the below examples. {json_prompt}
  - output={{"item1_name": "item1_description", "item2_name": "item2_description", ...}}
  - output={{"restaurant": "There is a restaurant displaying 'Stevens Pizza of Oakmont", "street_sign": "One street sign says Orange Ave"}}

AGENT: output="""

# Describe image
prompt_inputs = {"json_prompt": JSON_PROMPT}
text_results = call_openai(model, SYS_MESSAGE, DESCRIBE_TEXT_PROMPT, prompt_inputs, image_inputs)


## Tool 2: Describe image scenery

In [9]:
# Define prompt
DESCRIBE_SCENE_PROMPT = """USER: Your task is to describe this image in detail. Here are some instructions:
- Focus entirely on the scenery the image, whether architecture, nature, or people.
- Ignore the text related to the Geoguesser game.
- Sort the items based on how useful the information might be for someone trying to determine where in the world the image was taken.
- Return a json following the below examples. {json_prompt}
  - output={{"item1_name": "item1_description", "item2_name": "item2_description", ...}}
  - output={{"mountain": "There is a large mountain range in the background of the image with white snowcaps", "trees": "There are Sequioa trees on the mountain", "architecture": "There is a Victorian building in the image", "people": "There are people in the image wearing American clothing"}}

AGENT: output="""

# Describe image scenery
prompt_inputs = {"json_prompt": JSON_PROMPT}
scene_results = call_openai(model, SYS_MESSAGE, DESCRIBE_SCENE_PROMPT, prompt_inputs, image_inputs)

## Tool 3: Interpret details

In [13]:
# Define prompt
CANDIDATES_PROMPT = """USER: A previous agent has extracted features from a set of four images facing North, South, East, and West.
Your task is to interpret these features and attempt to guess which city the image is from. Here is some helpful information and instruction:
*Information*
- text_results: {text_results}
- scene_results: {scene_results}

*Instructions*
- Provide an educated guess on which city the image is from.
- Return your top few guesses and their confidence levels from 0-100.
- Return a json following the below examples. {json_prompt}
  - output={{"Tokyo": {{"confidence": 0.7, "reasoning": "The image features a singular large mountain in the background, which could be Mt. Fuji but it is hard to determine with confidence."}}}}

AGENT: output="""

# Come up with potential city candidates
prompt_inputs = {"json_prompt": JSON_PROMPT, "text_results": text_results, "scene_results": scene_results}
candidates = call_openai(model, SYS_MESSAGE, CANDIDATES_PROMPT, prompt_inputs, image_inputs)

## Tool 4: Google Search Queries

In [14]:
# Define prompt
SEARCH_PROMPT = """USER: A previous agent has extracted features from an image. 
Your task is to come up with a set of Google searches based on the features extraction from the image. Here is some helpful information and instruction:
*Information*
- text_results: {text_results}
- scene_results: {scene_results}
- city_candidates: {candidates}

*Instructions*
- Return Google search queries that are likely to identify a specific address.
- Ensure that search queries are specific and not too vague.
- Focus primarily on landmarks and locations with names. Avoid queries that describe the scenery or generic objects like a FedEx truck.
- Return a json following the below examples. {json_prompt}
  - output={{"search1": "WalMart Steele Street", "search2": "F37 bus route in Germany", ...}}

AGENT: output="""

# Come up with Google searches
prompt_inputs = {"json_prompt": JSON_PROMPT, "text_results": text_results, "scene_results": scene_results, "candidates": candidates}
queries = call_openai(model, SYS_MESSAGE, SEARCH_PROMPT, prompt_inputs, image_inputs)

## Tool 5: Run Google Queries

In [15]:
description = "Find the location associated with this landmark"
search_results = {}
for key, query in queries.items():
    result = get_top_n_results(description, query)
    search_results[key] = result

  warn_deprecated(


## Tool X: Guess Location

In [16]:
# Define prompt
GEO_GUESSER_PROMPT = """USER: A previous agent has extracted features from an image. 
Your task is to determine the coordinates from which the picture was taken. It can be anywhere in the world. Here is some helpful information and instruction:
*Information*
- text_results: {text_results}
- scene_results: {scene_results}
- city_candidates: {candidates}
- search_results: {search_results}

*Instructions*
Return json with the city and coordinates following the below example. {json_prompt}
output={{"city": "Orland Park, IL, 60467, USA", "latitude": "42.0099", "longitude": "-87.62317"}}

AGENT: output="""

# Predict location
prompt_inputs = {"json_prompt": JSON_PROMPT, "text_results": text_results, "scene_results": scene_results, "candidates": candidates, "search_results": search_results}
pred = call_openai(model, SYS_MESSAGE, GEO_GUESSER_PROMPT, prompt_inputs, image_inputs)

## Eval

In [17]:
distance = calculate_distance(pred, target)
print("Distance:", distance, "km")

Distance: 267.8 km


## Location SEarch

## Try MapBox
- No location bias, but missing Woods Mart

In [None]:
import requests

# Mapbox API key and endpoint
mapbox_access_token = "sk.eyJ1IjoiamNhbHphcmV0dGEiLCJhIjoiY2x3cGI5eGsxMWlqazJrbWphcmNkZm93ZSJ9._rNUDM5LKJ98Dpqm2VhTRg"
mapbox_search_url = "https://api.mapbox.com/geocoding/v5/mapbox.places"

# Search query
query = "The Shakespeare pub, Phoenix St"

# Parameters for the request
params = {
    "access_token": mapbox_access_token,
    "autocomplete": True,
    "types": "poi",
    "limit": 20,
    "query": query
}

# Construct the full URL for the request
full_url = f"{mapbox_search_url}/{query}.json"

# Make the request to Mapbox API
response = requests.get(full_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    search_results = response.json()
    print("Mapbox search results for 'Woods Mart':", search_results)
else:
    print("Failed to retrieve Mapbox search results, status code:", response.status_code)




## Try HERE API

In [None]:
import requests

# HERE API key and endpoint
here_api_key = "A_z2G4fxogaShflHgE7Ki_znR590bnfQEG9HI8x6lM4"
here_search_url = "https://geocode.search.hereapi.com/v1/geocode"

# Search query
query = "The Shakespeare pub, Phoenix St"

# Parameters for the request
params = {
    "apikey": here_api_key,
    "q": query
}

# Make the request to HERE API
response = requests.get(here_search_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    search_results = response.json()
    print("HERE search results for 'Woods Mart':", search_results)
else:
    print("Failed to retrieve HERE search results, status code:", response.status_code)


## Tool 5: Google Maps API

In [None]:
search_result = gmaps.places(
    query="The Shakespeare pub, Phoenix St, UK", #<----------------- TEMP CHEATING
    location=(0,0),
    radius=20000000
    )
search_result

In [None]:
search_results = {}
for i, query in candidates.items():
    # Search for 'Woods Mart' using Google Maps API
    search_result = gmaps.places_autocomplete_query(
        input_text="The Shakespeare pub, Phoenix St, UK", #<----------------- TEMP CHEATING
        location=(0,0),
        radius=20000000
        )

    # Extract the top results if available
    top_results = []
    n = 10
    for i, result in enumerate(search_result):
        top_results.append(result["description"])
        if i > n:
            break

    # Track all query results
    search_results[query] = top_results

print(search_results)

## Tool 6: GeoGuesser

In [None]:
# Define prompt
prompt_template = """USER: A previous agent has extracted features from an image. Your task is to come up with a set of Google Maps searches based on key landmarks and locations found in the image. Here is some helpful information and instruction:
*Information*
- OCR: Here are the following text items found in the image: {img_text}
- Scenery: Here are the following scenery items found in the image: {img_scenery}
- Google Maps: 

*Instructions*
- Return a set of Google Maps searches based on the text and scenery items found in the image.
- Return a json with the following format:
  - output={{"search1": "search1_query", "search2": "search2_query", ...}}

AGENT: output="""
prompt = prompt_template.format(img_text=img_text, img_scenery=img_scenery)

# Define payload
payload = {
  "model": "gpt-4o",
  "messages": [
    sys_message,
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        }
      ]
    }
  ],
  "max_tokens": 300
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
candidates = response.json()['choices'][0]['message']['content']
print(candidates)