https://platform.openai.com/docs/guides/vision

https://docs.anthropic.com/en/docs/vision

https://replicate.com/collections/vision-models api docs in models

https://ai.google.dev/gemini-api/docs/api-overview

In [2]:
#@title import, functions
# set up
import math
import csv
import os
import base64
import requests
import json
import re

def extract_coordinates_response(text):
    text = text.replace('*', '')
    pattern = re.compile(r"Coordinates:\s*(\-?\d+\.\d+),\s*(\-?\d+\.\d+)")
    match = pattern.search(text)
    if match:
        # Extract the latitude and longitude as floats
        latitude = float(match.group(1))
        longitude = float(match.group(2))
        return f"{latitude}, {longitude}"
    else:
        return None


def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def degrees2radians(degrees):
    return degrees * (math.pi/180)

def distance(lat1, lon1, lat2, lon2):
    earthRadiusKm = 6371;

    dLat = degrees2radians(lat2-lat1);
    dLon = degrees2radians(lon2-lon1);
    lat1 = degrees2radians(lat1);
    lat2 = degrees2radians(lat2);
    a = math.sin(dLat/2) * math.sin(dLat/2) + math.sin(dLon/2) * math.sin(dLon/2) * math.cos(lat1) * math.cos(lat2);
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a));
    distance = earthRadiusKm * c;
    return distance

def extract_coordinates_filename(filename):
    name, _ = os.path.splitext(filename)
    lat, lon = name.split('_')
    return f"{lat}, {lon}"

def response_openai(api_key, prompt, filename):
    path = filename
    base64_image = encode_image(path)

    headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}"
        }

        # Construct the payload
    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"{prompt}"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 1000
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    return response.json()

def grading(prompt, response):
    headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {os.environ['ANTHROPIC_API_KEY']}"
        }

In [None]:
!wget link/to/dataset -O /content/dataset.zip
!unzip -FF /content/dataset.zip -d /content/dataset/
!cd /content/dataset && jpegoptim --size=19000K --strip-all --max=80 *.jpg

In [None]:
image_directory = 'dataset'

output_csv_path = 'template.csv'

file_list = [f for f in os.listdir(image_directory) if os.path.isfile(os.path.join(image_directory, f))]

with open(output_csv_path, mode='w', newline='') as file:
    csv_writer = csv.writer(file)
    csv_writer.writerow(['source','actual_coordinates', 'model','response', 'guess', 'distance','climate','architecture','street_element', 'language'])
    for filename in file_list:
        # print(filename)
        coordinate = extract_coordinates_filename(filename)
        csv_writer.writerow([filename, coordinate, '', '', '', '', '', '', '', ''])
        print(f'Added {filename} with coordinate {coordinate}')


In [None]:
csv_file_path = 'template.csv'
api_key = os.environ["OPENAI_API_KEY"]
prompt = "You are a world class GeoGuessr player, and can guess the location of images very well based on the images alone. Here is an image, try your best to guess the location. Make a chain of thought to infer clues from details in the image. Then, using the information, give a best guess to the coordinates of the image. Be very specific, don't give a general coordinates of the city or country, but infer what area the image is from. The coordinates must follow `Coordinates:` (on the same line) and be surrounded by asterisks like `lat, long`. Use plain text, no markdown."
grader_prompt = "You are a professional AI response grader and can accurately tag the reponses with 'climate','architecture','street-element', 'language'. Answer whether the given response infers the location from these information, give them 0 for no or 1 for yes. Your response should be like the following ```climate: 1, architecture: 0, street-element: 1, langaage: 1``` The response: "

data = []
with open(csv_file_path, mode='r', newline='') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        data.append(row)

for row in data[1:]:
    row[2]='gpt-4o'
    print(row[0])
    path_to_image = row[0]
    response = response_openai(api_key, prompt, "/content/dataset/" + path_to_image)
    print(response)
    row[3] = response['choices'][0]['message']['content']
    row[4] = extract_coordinates_response(row[3])
    lat0, lon0 = row[4].split(',')
    lat0 = float(lat0)
    lon0 = float(lon0)
    lat1, lon1 = row[1].split(',')
    lat1 = float(lat1)
    lon1 = float(lon1)
    row[5] = distance(lat0, lon0, lat1, lon1)
    # criterias = grading(grader_prompt, row[3])

with open(csv_file_path, mode='w', newline='') as file:
    csv_writer = csv.writer(file)
    csv_writer.writerows(data)
