In [6]:
import json
import os
import requests
from collections import defaultdict

# Set Google Maps API Key 
API_KEY = "AIzaSyDza7pLB-ddE2XEHhX3fJkJ_XlaSUqg3Gs"

# Function to convert address to latitude/longitude using Google Maps API
def get_location(address):
    if not address or address == "N/A":  # If address is missing or "N/A"
        return None
    try:
        # Construct Google Maps Geocoding API URL
        url = f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={API_KEY}"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            if data['status'] == "OK" and data['results']:
                location = data['results'][0]['geometry']['location']
                return {"latitude": location['lat'], "longitude": location['lng']}
            else:
                print(f"Error geocoding address '{address}': {data['status']}")
        else:
            print(f"HTTP Error for address '{address}': {response.status_code}")
    except Exception as e:
        print(f"Error getting location for {address}: {e}")
    return None

# Filter data and add Location field
def filter_and_add_location(data, address_key):
    filtered_data = []
    for entry in data:
        location = get_location(entry.get(address_key))
        if location:
            entry["Location"] = location
            filtered_data.append(entry)
    return filtered_data

# Load files
with open('output/google_result.json', 'r', encoding='utf-8') as f:
    google_data = json.load(f)

with open('output/restaurant_details.json', 'r', encoding='utf-8') as f:
    details_data = json.load(f)

# Filter and add Location field
google_data = filter_and_add_location(google_data, "address")
details_data = filter_and_add_location(details_data, "Address")

# Rename keys in google_result.json
google_rename_keys = {
    "name": "Name",
    "address": "Address",
    "rating": "Rating",
    "reviews_count": "Reviews_Count",
    "reviews": "Reviews"
}
for entry in google_data:
    for old_key, new_key in google_rename_keys.items():
        entry[new_key] = entry.pop(old_key)
    # Remove google_maps_url
    if "google_maps_url" in entry:
        del entry["google_maps_url"]

# Merge data
merged_data = []
location_map = {}

def merge_ratings(rating1, rating2):
    """Merge ratings: Use available rating if one is 'N/A', otherwise use 'N/A'."""
    if rating1 == "N/A" and rating2 == "N/A":
        return "N/A"
    if rating1 == "N/A":
        return float(rating2)
    if rating2 == "N/A":
        return float(rating1)
    return max(float(rating1), float(rating2))

def merge_reviews(reviews1, reviews2):
    """Merge reviews and remove duplicates."""
    return list(set(reviews1 + reviews2))

for g_entry in google_data:
    loc = tuple(g_entry["Location"].values())
    location_map[loc] = g_entry

for d_entry in details_data:
    loc = tuple(d_entry["Location"].values())
    if loc in location_map:
        # Merge logic
        g_entry = location_map[loc]
        g_entry["Rating"] = merge_ratings(g_entry["Rating"], d_entry["Rating"])
        g_entry["Reviews_Count"] = max(
            int(g_entry["Reviews_Count"]) if g_entry["Reviews_Count"] != "N/A" else 0,
            int(d_entry["Reviews_Count"]) if d_entry["Reviews_Count"] != "N/A" else 0
        )
        g_entry["Category"] = d_entry["Category"]
        g_entry["Reviews"] = merge_reviews(g_entry["Reviews"], d_entry["Reviews"])
    else:
        # Add new data
        location_map[loc] = d_entry
        d_entry["Name"] = d_entry.pop("Name")  # Reorganize keys for alignment
        d_entry["Address"] = d_entry.pop("Address")

merged_data = list(location_map.values())

# Save results
os.makedirs('output', exist_ok=True)
output_path = 'output/merged_result.json'

with open(output_path, 'w', encoding='utf-8') as f:
    json.dump(merged_data, f, ensure_ascii=False, indent=4)

print(f"The merged data has been saved to {output_path}.")



The merged data has been saved to output/merged_result.json.


In [7]:
import json

# Load the JSON file
with open('output/merged_result.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Count the number of entries
entry_count = len(data)

# Print the count
print(f"The number of data entries in the file is: {entry_count}")


The number of data entries in the file is: 211


In [1]:
import json
import openai
import os
import time

# Load API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")

def generate_search_keywords(name, reviews):
    """
    Generate search keywords for a restaurant using GPT.
    """
    prompt = f"""
    You are an expert in generating search keywords for restaurants.
    Based on the following information, create a list of concise and relevant search keywords for this restaurant:
    
    Restaurant Name: {name}
    Reviews:
    {reviews[:1000]}  # Limit reviews to 1000 characters
    
    Provide the list of search keywords as a JSON array, e.g., ["keyword1", "keyword2", "keyword3"].
    """

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant for generating restaurant search keywords."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=150,
            temperature=0.7
        )
        keywords = response['choices'][0]['message']['content'].strip()
        try:
            return json.loads(keywords)  # Parse JSON
        except json.JSONDecodeError:
            print(f"Invalid JSON response for {name}: {keywords}")
            return []
    except Exception as e:
        print(f"Error generating keywords for {name}: {e}")
        return []

# Load merged_result.json
with open('output/merged_result.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Add search keywords to each restaurant
for entry in data:
    name = entry.get("Name", "Unknown")
    reviews = " ".join(entry.get("Reviews", []))
    keywords = generate_search_keywords(name, reviews)
    entry["Search_Keywords"] = keywords

    # To avoid hitting API rate limits
    time.sleep(1)

# Save the updated data
output_path = 'withGPT_result.json'
with open(output_path, 'w', encoding='utf-8') as file:
    json.dump(data, file, ensure_ascii=False, indent=4)

print(f"Keywords added and saved to {output_path}.")


Error generating keywords for Le Potager du Marais: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Error generating keywords for Jah Jah By Le Tricycle: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.co