In [None]:
import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv('OPENAI_KEY')
)


url_fwd = "https://github.com/hhe1ibeb/xinyi_geosearch/blob/dev/data/photos/25.01722_121.57986_fwd.jpeg?raw=true"
url_l = "https://github.com/hhe1ibeb/xinyi_geosearch/blob/dev/data/photos/25.01722_121.57986_l.jpeg?raw=true"
url_r = "https://github.com/hhe1ibeb/xinyi_geosearch/blob/dev/data/photos/25.01722_121.57986_r.jpeg?raw=true"


response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a real estate agent whose job is to describe surroundings to a potential buyer. "},
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "According to these three pictures, please describe this place. The description should be in summarized bullet points, including traffic, attractions, and overall neighborhood atmosphere. "},
                {
                    "type": "image_url",
                    "image_url": {"url": str(url_fwd)}
                },
                {
                    "type": "image_url",
                    "image_url": {"url": str(url_l)}
                },
                {
                    "type": "image_url",
                    "image_url": {"url": str(url_r)}
                },
            ],
        }
    ],
    max_tokens=300,
)

In [66]:
response.choices[0].message.content

'- **Traffic:**\n  - The area features a tunnel, indicating it is a part of a major road or highway network.\n  - Traffic flow seems to be guided and controlled within the tunnel, and there may be moderate to high traffic volume depending on the time of day.\n\n- **Attractions:**\n  - The tunnel has some artwork or murals on the walls, which could be a point of interest for passersby.\n  - The surroundings suggest that this could be part of an urban area, possibly with more attractions outside the tunnel.\n\n- **Overall Neighborhood Atmosphere:**\n  - The tunnel provides a functional and somewhat utilitarian atmosphere.\n  - The presence of lighting and artwork contributes to a sense of safety and a touch of cultural expression within an otherwise ordinary infrastructure.\n  - The surroundings might be urban and heavily trafficked, likely supporting a busy, active neighborhood.'

In [4]:
import os
import pandas as pd
from openai import OpenAI
import csv
import base64
from urllib.request import urlopen

# Read the CSV file into a DataFrame
table_csv = pd.read_csv('coordinates.csv', dtype={'lat': str, 'lon': str})
df = pd.DataFrame(table_csv)

# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_KEY'))

def get_as_base64(url):
    return base64.b64encode(urlopen(url).read()).decode('utf-8')

def encode_image(img_path):
    with open(img_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Function to get descriptions from the OpenAI API
def get_descriptions(lat, lon):
    try:
        url_fwd = encode_image(f"./photos/{lat}_{lon}_fwd.jpeg")
        url_l = encode_image(f"./photos/{lat}_{lon}_l.jpeg")
        url_r = encode_image(f"./photos/{lat}_{lon}_r.jpeg")

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a real estate agent whose job is to describe surroundings to a potential buyer."},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "According to these three pictures, please describe this place. The description should be in summarized bullet points, including traffic, attractions, and overall neighborhood atmosphere."},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{url_fwd}"}},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{url_l}"}},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{url_r}"}}
                    ],
                }
            ],
            max_tokens=300,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error fetching description for {lat}, {lon}: {e}")
        return None

# Process the coordinates in batches
batch_size = 10

# Check for existing descriptions and start from the next unprocessed row
# start_index = df['description'].last_valid_index()
# if start_index is None:
#     start_index = 0
# else:
#     start_index += 1

start_index = 0

# print(start_index)

# Process the coordinates in batches using csv.reader
for start in range(start_index, len(df), batch_size):
    end = min(start + batch_size, len(df))
    
    descriptions = df['description'].tolist()
    
    with open('coordinates.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        rows = list(reader)
        
        for i in range(start, end):
            row = rows[i]
            if len(row['description']) != 0:
                continue

            lat = float(row['lat'])
            lon = float(row['lon'])
            print(lat, lon)
            description = get_descriptions(lat, lon)
            print(description)
            descriptions[i] = description
    
    # Update the DataFrame with descriptions for this batch
    df['description'] = descriptions
    
    # Save the updated DataFrame to the CSV file after each batch
    df.to_csv('coordinates.csv', index=False)

    print(f"Batch {start // batch_size + 1} saved to CSV.")

print("All batches processed and saved.")

Batch 1 saved to CSV.
Batch 2 saved to CSV.
Batch 3 saved to CSV.
Batch 4 saved to CSV.
Batch 5 saved to CSV.
Batch 6 saved to CSV.
Batch 7 saved to CSV.
Batch 8 saved to CSV.
Batch 9 saved to CSV.
Batch 10 saved to CSV.
Batch 11 saved to CSV.
Batch 12 saved to CSV.
Batch 13 saved to CSV.
Batch 14 saved to CSV.
Batch 15 saved to CSV.
Batch 16 saved to CSV.
Batch 17 saved to CSV.
Batch 18 saved to CSV.
Batch 19 saved to CSV.
Batch 20 saved to CSV.
Batch 21 saved to CSV.
Batch 22 saved to CSV.
Batch 23 saved to CSV.
Batch 24 saved to CSV.
Batch 25 saved to CSV.
Batch 26 saved to CSV.
Batch 27 saved to CSV.
Batch 28 saved to CSV.
Batch 29 saved to CSV.
Batch 30 saved to CSV.
Batch 31 saved to CSV.
Batch 32 saved to CSV.
Batch 33 saved to CSV.
Batch 34 saved to CSV.
Batch 35 saved to CSV.
Batch 36 saved to CSV.
Batch 37 saved to CSV.
Batch 38 saved to CSV.
Batch 39 saved to CSV.
Batch 40 saved to CSV.
Batch 41 saved to CSV.
Batch 42 saved to CSV.
Batch 43 saved to CSV.
Batch 44 saved to CS

In [5]:
import pandas as pd
import csv

cnt = 0

with open('coordinates.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)
    
    for row in rows:
        if len(row['description']) == 0:
            print(row['lat'], row['lon'])
            cnt += 1

print(cnt)

0
