In [None]:
import os
from openai import OpenAI

client = OpenAI(
    api_key=os.getenv('OPENAI_KEY')
)


url_fwd = "https://github.com/hhe1ibeb/xinyi_geosearch/blob/dev/data/photos/25.01722_121.57986_fwd.jpeg?raw=true"
url_l = "https://github.com/hhe1ibeb/xinyi_geosearch/blob/dev/data/photos/25.01722_121.57986_l.jpeg?raw=true"
url_r = "https://github.com/hhe1ibeb/xinyi_geosearch/blob/dev/data/photos/25.01722_121.57986_r.jpeg?raw=true"


response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a real estate agent whose job is to describe surroundings to a potential buyer. "},
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "According to these three pictures, please describe this place. The description should be in summarized bullet points, including traffic, attractions, and overall neighborhood atmosphere. "},
                {
                    "type": "image_url",
                    "image_url": {"url": str(url_fwd)}
                },
                {
                    "type": "image_url",
                    "image_url": {"url": str(url_l)}
                },
                {
                    "type": "image_url",
                    "image_url": {"url": str(url_r)}
                },
            ],
        }
    ],
    max_tokens=300,
)

In [None]:
response.choices[0].message.content

In [None]:
import os
import pandas as pd
from openai import OpenAI
import csv
import base64
from urllib.request import urlopen

# Read the CSV file into a DataFrame
table_csv = pd.read_csv('coordinates.csv', dtype={'lat': str, 'lon': str})
df = pd.DataFrame(table_csv)

# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_KEY'))

def get_as_base64(url):
    return base64.b64encode(urlopen(url).read()).decode('utf-8')

def encode_image(img_path):
    with open(img_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Function to get descriptions from the OpenAI API
def get_descriptions(lat, lon):
    try:
        url_fwd = encode_image(f"./photos/{lat}_{lon}_fwd.jpeg")
        url_l = encode_image(f"./photos/{lat}_{lon}_l.jpeg")
        url_r = encode_image(f"./photos/{lat}_{lon}_r.jpeg")

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a real estate agent whose job is to describe surroundings to a potential buyer."},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "According to these three pictures, please describe this place. The description should be in summarized bullet points, including traffic, attractions, and overall neighborhood atmosphere."},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{url_fwd}"}},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{url_l}"}},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{url_r}"}}
                    ],
                }
            ],
            max_tokens=300,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error fetching description for {lat}, {lon}: {e}")
        return None

# Process the coordinates in batches
batch_size = 10

# Check for existing descriptions and start from the next unprocessed row
# start_index = df['description'].last_valid_index()
# if start_index is None:
#     start_index = 0
# else:
#     start_index += 1

start_index = 0

# print(start_index)

# Process the coordinates in batches using csv.reader
for start in range(start_index, len(df), batch_size):
    end = min(start + batch_size, len(df))
    
    descriptions = df['description'].tolist()
    
    with open('coordinates.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        rows = list(reader)
        
        for i in range(start, end):
            row = rows[i]
            if len(row['description']) != 0:
                continue

            lat = float(row['lat'])
            lon = float(row['lon'])
            print(lat, lon)
            description = get_descriptions(lat, lon)
            print(description)
            descriptions[i] = description
    
    # Update the DataFrame with descriptions for this batch
    df['description'] = descriptions
    
    # Save the updated DataFrame to the CSV file after each batch
    df.to_csv('coordinates.csv', index=False)

    print(f"Batch {start // batch_size + 1} saved to CSV.")

print("All batches processed and saved.")

In [None]:
import pandas as pd
import csv

cnt = 0

with open('coordinates.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)
    
    for row in rows:
        if len(row['description']) == 0:
            print(row['lat'], row['lon'])
            cnt += 1

print(cnt)

In [None]:
import pandas as pd
import openai
import os

# Read the CSV file into a DataFrame
df = pd.read_csv('coordinates_translated.csv', dtype={'lat': str, 'lon': str})

# Initialize the OpenAI client
openai.api_key = os.getenv('OPENAI_KEY')

def get_translation(text):
    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-16k",
            messages=[
                {"role": "system", "content": "You are a professional translator and you will be given a language and a body of text in English. Your job is to translate the text into Taiwanese Mandarin, using **only** Traditional Chinese Characters."},
                {"role": "user", "content": text}
            ],
            max_tokens=1000,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error fetching translation for text '{text}': {e}")
        return None

# Ensure 'descriptions-mandarin' column exists
if 'descriptions-mandarin' not in df.columns:
    df['descriptions-mandarin'] = None

# Process the coordinates in batches
batch_size = 10

# Check for existing descriptions and start from the next unprocessed row
start_index = df['descriptions-mandarin'].last_valid_index()
if start_index is None:
    start_index = 0
else:
    start_index += 1

print(start_index)

# Process the coordinates in batches
for start in range(start_index, len(df), batch_size):
    end = min(start + batch_size, len(df))
    
    for i in range(start, end):
        if pd.notnull(df.at[i, 'descriptions-mandarin']):
            continue

        description = df.at[i, 'description']
        translated = get_translation(description)
        if translated:
            df.at[i, 'descriptions-mandarin'] = translated
        print(f"Original: {description}")
        print(f"Translated: {translated}")
    
    # Save the updated DataFrame to the CSV file after each batch
    df.to_csv('coordinates_translated.csv', index=False)
    print(f"Batch {start // batch_size + 1} saved to CSV.")

print("All batches processed and saved.")

In [None]:
import csv

cnt = 0

with open('coordinates_translated.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)
    
    for row in rows:
        if len(row['descriptions-mandarin']) == 0:
            print(row['lat'], row['lon'])
            cnt += 1

print(cnt)

In [None]:
import csv

cnt = 0

with open('coordinates_translated.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    rows = list(reader)
    
    for row in rows:
        cnt += 1

print(cnt)