Uses KakaoMap API to pull restaurant data.




In [6]:
import requests
import csv
import os

# Replace with your Kakao API key
api_key = '5ac23e3f4a305df669580f00637c8459'

# Define search areas: west, middle, and east of Jeju
search_areas = [
    {'x': 126.1628, 'y': 33.3946, 'area_name': 'West Jeju'},   # West Jeju
    {'x': 126.570667, 'y': 33.450701, 'area_name': 'Middle Jeju'},  # Middle Jeju
    {'x': 126.9748, 'y': 33.5097, 'area_name': 'East Jeju'}    # East Jeju
]

# Define the request headers
headers = {
    'Authorization': f'KakaoAK {api_key}'
}

# Define the CSV file name
csv_file = 'jeju_restaurants.csv'
file = open(csv_file, "w+")
file.close()

# Create a set to track unique place IDs
unique_place_ids = set()

# Check if the CSV file already exists
file_exists = os.path.isfile(csv_file)

# Open the CSV file in append mode
with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the header if the file does not exist
    writer.writerow(['Place Name', 'Longitude', 'Latitude', 'Phone', 'Average Rating', 'Average Price'])


    for area in search_areas:
        print(f"Searching in {area['area_name']}...")

        # Define the search parameters for each area
        params = {
            'query': 'restaurant',
            'x': area['x'],  # Longitude
            'y': area['y'],  # Latitude
            'radius': 20000  # Search radius in meters
        }

        # Make the API request to search for places
        response = requests.get('https://dapi.kakao.com/v2/local/search/keyword.json', headers=headers, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            # Process the data
            for document in data.get('documents', []):
                place_name = document.get('place_name')
                place_id = document.get('id')
                longitude = document.get('x')
                latitude = document.get('y')
                phone = document.get('phone')

                # Skip if the place has already been processed
                if place_id in unique_place_ids:
                    continue
                unique_place_ids.add(place_id)

                # Make another API request to get detailed information including reviews and pricing
                detail_response = requests.get(f'https://place.map.kakao.com/main/v/{place_id}', headers=headers)

                if detail_response.status_code == 200:
                    detail_data = detail_response.json()

                    # Extract reviews and price information (if available)
                    prices = detail_data.get('menuInfo', {}).get('menuList', [])

                    # Extract and calculate the average rating
                    comments = detail_data.get('comment', {}).get('list', [])
                    average_rating = None
                    if comments:
                        total_score = sum(comment.get('point', 0) for comment in comments)
                        review_count = len(comments)
                        average_rating = total_score / review_count

                    # Calculate the average price
                    average_price = None
                    if prices:
                        total_price = 0
                        count = 0
                        for price in prices:
                            price_value_str = price.get('price')
                            if price_value_str:  # Check if the price is not None
                                try:
                                    price_value = int(price_value_str.replace(',', '').replace('원', '').strip())
                                    total_price += price_value
                                    count += 1
                                except ValueError:
                                    print(f"Could not convert price '{price_value_str}' to an integer.")
                        if count > 0:
                            average_price = total_price / count

                    # Write to the CSV file
                    writer.writerow([place_name, longitude, latitude, phone, average_rating, average_price])
                else:
                    print(f"Error fetching details for place ID {place_id}: {detail_response.status_code}")
        else:
            print(f"Error: {response.status_code}")

print("Data collection complete. Results have been saved to", csv_file)


Searching in West Jeju...
Could not convert price '변동가격' to an integer.
Searching in Middle Jeju...
Searching in East Jeju...
Data collection complete. Results have been saved to jeju_restaurants.csv


Helper method for finding coordinates using Google API

In [7]:
import requests
# Function to get latitude and longitude using a geocoding API
def get_coordinates(address):
    api_key = 'AIzaSyA_lQsftIpN1Wi4bqovRwjUcRE_4D88H04'  # Replace with your actual API key
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    params = {"address": address, "key": api_key}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        result = response.json()
        if result['results']:
            location = result['results'][0]['geometry']['location']
            return location['lat'], location['lng']
    return None, None

In [11]:
!pip install selenium webdriver_manager bs4 webdriver_manager.chrome

[31mERROR: Could not find a version that satisfies the requirement webdriver_manager.chrome (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for webdriver_manager.chrome[0m[31m
[0m

Webscraper for Airbnb

In [12]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

l = []
o = {}

options = webdriver.ChromeOptions()
# Create a service object
service = Service(ChromeDriverManager().install())
# Initialize the Chrome WebDriver with service and options
driver = webdriver.Chrome(service=service, options=options)

adults = 4
checkin = "2024-09-17"
checkout = "2024-09-18"

driver.get(f"https://www.airbnb.co.in/s/Jeju-Island/homes?adults={adults}&checkin={checkin}&checkout={checkout}")

time.sleep(5)  # Increased sleep time to allow all elements to load
html_content = driver.page_source

driver.quit()

soup = BeautifulSoup(html_content, 'html.parser')

allData = soup.find_all("div", {"itemprop": "itemListElement"})

for i in range(0, len(allData)):
    try:
        o["property-title"] = allData[i].find('div', {'data-testid': 'listing-card-title'}).text.strip()
    except:
        o["property-title"] = None

    try:
        o["rating"] = allData[i].find('div', {'class': 't1a9j9y7'}).text.split()[0]
    except:
        o["rating"] = None

    try:
        o["price"] = allData[i].find('span', {"class": "_14y1gc"}).text.strip().split()[0]
    except:
        o["price"] = None

    try:
        o["price_with_tax"] = allData[i].find('div', {'class': '_i5duul'}).find('div', {"class": "_10d7v0r"}).text.strip().split(" total")[0]
    except:
        o["price_with_tax"] = None

    try:
        o["link"] = "https://www.airbnb.co.in" + allData[i].find('a', href=True)['href']
    except:
        o["link"] = None

    l.append(o)
    o = {}

df = pd.DataFrame(l)
df.to_csv('airbnb.csv', index=False, encoding='utf-8')
print(l)


WebDriverException: Message: unknown error: cannot find Chrome binary
Stacktrace:
#0 0x5894f5eed4e3 <unknown>
#1 0x5894f5c1cc76 <unknown>
#2 0x5894f5c43757 <unknown>
#3 0x5894f5c42029 <unknown>
#4 0x5894f5c80ccc <unknown>
#5 0x5894f5c8047f <unknown>
#6 0x5894f5c77de3 <unknown>
#7 0x5894f5c4d2dd <unknown>
#8 0x5894f5c4e34e <unknown>
#9 0x5894f5ead3e4 <unknown>
#10 0x5894f5eb13d7 <unknown>
#11 0x5894f5ebbb20 <unknown>
#12 0x5894f5eb2023 <unknown>
#13 0x5894f5e801aa <unknown>
#14 0x5894f5ed66b8 <unknown>
#15 0x5894f5ed6847 <unknown>
#16 0x5894f5ee6243 <unknown>
#17 0x7825bb59fac3 <unknown>


Get coordinates of airbnb through city in name.


In [None]:
import pandas as pd

def extract_city(property_title):
    # Split the title by commas and take the last segment
    parts = property_title.split(',')
    if len(parts) > 1:
        city = parts[-1].strip()
    else:
        # Fallback: split by spaces and take the last word
        city = property_title.split()[-1].strip()
    return city

# Read the CSV file
df = pd.read_csv('airbnb.csv')

# Extract city names
df['city'] = df['property-title'].apply(extract_city)

# Get coordinates for each city
df['coordinates'] = df['city'].apply(lambda city: get_coordinates(city))

# Split the coordinates into latitude and longitude
df[['latitude', 'longitude']] = pd.DataFrame(df['coordinates'].tolist(), index=df.index)

# Drop the temporary 'coordinates' column
df.drop(columns=['coordinates'], inplace=True)

# Save the updated DataFrame to a new CSV file
df.to_csv('airbnb_properties_with_coordinates.csv', index=False)

print("Updated CSV file saved.")


Webscraper for hotels using Booking.com



In [13]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

l = []
o = {}

options = webdriver.ChromeOptions()
# Create a service object
service = Service(ChromeDriverManager().install())
# Initialize the Chrome WebDriver with service and options
driver = webdriver.Chrome(service=service, options=options)

adults = 2
rooms = 1
children = 0
checkin = "2024-09-19"
checkout = "2024-09-20"
destination = "Jeju Island"

# Parse the check-in and check-out dates
checkin_year, checkin_month, checkin_day = checkin.split("-")
checkout_year, checkout_month, checkout_day = checkout.split("-")

# Construct the correct Booking.com URL
url = (
    f"https://www.booking.com/searchresults.html?"
    f"ss={destination.replace(' ', '+')}&"
    f"checkin_year={checkin_year}&"
    f"checkin_month={checkin_month}&"
    f"checkin_monthday={checkin_day}&"
    f"checkout_year={checkout_year}&"
    f"checkout_month={checkout_month}&"
    f"checkout_monthday={checkout_day}&"
    f"group_adults={adults}&"
    f"no_rooms={rooms}&"
    f"group_children={children}&"
    f"lang=en-us"
)

driver.get(url)

time.sleep(5)  # Increase sleep time to allow all elements to load
html_content = driver.page_source

driver.quit()

soup = BeautifulSoup(html_content, 'html.parser')

# Finding all relevant hotel elements on the page
allData = soup.find_all("div", {"data-testid": "property-card"})

for i in range(0, len(allData)):
    try:
        o["property-title"] = allData[i].find('div', {'data-testid': 'title'}).text.strip()
    except:
        o["property-title"] = None

    try:
        o["rating"] = allData[i].find('div', {'data-testid': 'review-score'}).find('div').text.strip()
    except:
        o["rating"] = None

    try:
        o["price"] = allData[i].find('span', {"data-testid": "price-and-discounted-price"}).text.strip()
    except:
        o["price"] = None

    try:
        o["link"] = allData[i].find('a', href=True)['href']
    except:
        o["link"] = None

    try:
        o["location"] = allData[i].find('span', {'data-testid': 'address'}).text.strip()
        # Get latitude and longitude using the geocoding API
        o["latitude"], o["longitude"] = get_coordinates(o["location"])
    except:
        o["location"] = None
        o["latitude"], o["longitude"] = None, None

    l.append(o)
    o = {}

df = pd.DataFrame(l)
df.to_csv('booking_com.csv', index=False, encoding='utf-8')
print(l)


WebDriverException: Message: unknown error: cannot find Chrome binary
Stacktrace:
#0 0x5c6161b7b4e3 <unknown>
#1 0x5c61618aac76 <unknown>
#2 0x5c61618d1757 <unknown>
#3 0x5c61618d0029 <unknown>
#4 0x5c616190eccc <unknown>
#5 0x5c616190e47f <unknown>
#6 0x5c6161905de3 <unknown>
#7 0x5c61618db2dd <unknown>
#8 0x5c61618dc34e <unknown>
#9 0x5c6161b3b3e4 <unknown>
#10 0x5c6161b3f3d7 <unknown>
#11 0x5c6161b49b20 <unknown>
#12 0x5c6161b40023 <unknown>
#13 0x5c6161b0e1aa <unknown>
#14 0x5c6161b646b8 <unknown>
#15 0x5c6161b64847 <unknown>
#16 0x5c6161b74243 <unknown>
#17 0x7bc3b2993ac3 <unknown>


In [14]:
!pip install geopy



Algorithm that reduces the size of all locations/restaurants/hostels based on user preferences.

In [15]:
import pandas as pd
from geopy.distance import geodesic

# Function to calculate distance between two points
def calculate_distance(lat1, long1, lat2, long2):
    return geodesic((lat1, long1), (lat2, long2)).kilometers

def jeju_optimized_itinerary_data(start_long, start_lat, num_of_days, isAirbnb, budget, preferred_locations=None):
    # Load the CSV files
    if isAirbnb:
        hostel_df = pd.read_csv('airbnb_properties_with_coordinates.csv')
        hostel_df['price'] = hostel_df['price_with_tax'].str.replace('₹', '').str.replace(',', '').astype(float)
    else:
        hostel_df = pd.read_csv('booking_com.csv')
        hostel_df['price'] = hostel_df['price'].str.replace('$', '').str.replace(',', '').astype(float)

    jeju_locations_df = pd.read_csv('jeju_locations.csv')
    jeju_restaurants_df = pd.read_csv('jeju_restaurants.csv')

    # Filter out invalid latitude and longitude values
    hostel_df = hostel_df[(hostel_df['latitude'].between(-90, 90)) & (hostel_df['longitude'].between(-180, 180))]
    jeju_locations_df = jeju_locations_df[(jeju_locations_df['위도'].between(-90, 90)) & (jeju_locations_df['경도'].between(-180, 180))]
    jeju_restaurants_df = jeju_restaurants_df[(jeju_restaurants_df['Latitude'].between(-90, 90)) & (jeju_restaurants_df['Longitude'].between(-180, 180))]

    # Get coordinates for preferred locations
    if preferred_locations:
        location_coords = [get_coordinates(location) for location in preferred_locations]
        hostel_df['location_distance'] = hostel_df.apply(
            lambda row: min([calculate_distance(row['latitude'], row['longitude'], loc[0], loc[1]) for loc in location_coords if loc != (None, None)]), axis=1
        )
        hostel_df = hostel_df.sort_values(by=['location_distance', 'price', 'rating'])

    # Select top 5 hostels within the budget
    selected_hostels = []
    for _, hostel in hostel_df.iterrows():
        if hostel['price'] <= budget:
            selected_hostels.append(hostel)
        if len(selected_hostels) >= 3:
            break

    # For each selected hostel, find the nearby locations and restaurants
    itineraries = []
    for hostel in selected_hostels:
        hostel_lat = hostel['latitude']
        hostel_long = hostel['longitude']

        # Calculate distances to locations and restaurants
        jeju_locations_df['distance'] = jeju_locations_df.apply(
            lambda row: calculate_distance(hostel_lat, hostel_long, row['위도'], row['경도']), axis=1
        )
        jeju_restaurants_df['distance'] = jeju_restaurants_df.apply(
            lambda row: calculate_distance(hostel_lat, hostel_long, row['Latitude'], row['Longitude']), axis=1
        )

        # Sort locations and restaurants by distance, then by rating and price
        nearby_locations = jeju_locations_df.sort_values(by='distance').head(num_of_days * 9)
        nearby_restaurants = jeju_restaurants_df.sort_values(by=['distance', 'Average Price (WON)', 'Average Rating']).head(num_of_days * 9)

        # Check budget constraints
        total_restaurant_cost = nearby_restaurants['Average Price (WON)'].sum()
        if hostel['price'] + total_restaurant_cost <= budget:
            itineraries.append({
                'hostel': hostel['property-title'],
                'location': hostel['city'],
                'price': hostel['price'],
                'locations': nearby_locations.to_dict(orient='records'),
                'restaurants': nearby_restaurants.to_dict(orient='records')
            })

    # Convert to DataFrame for easy export to CSV
    itinerary_df = pd.DataFrame(itineraries)
    itinerary_df.to_csv('optimized_jeju_itinerary.csv', index=False)

    return itinerary_df



Test

In [16]:
jeju_optimized_itinerary_data(126.5609945, 33.2532177, 4, isAirbnb=True, budget=2000000, preferred_locations=['Seogwipo'])

FileNotFoundError: [Errno 2] No such file or directory: 'airbnb_properties_with_coordinates.csv'

In [None]:
!pip install openai langchain_ollama langchain_community langchain_upstage langchain_text_splitters langchain_chroma langsmith

RAG Upstage.ai

In [None]:
from openai import OpenAI
from langchain import hub
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_upstage import UpstageEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Load the CSV file
loader = CSVLoader(
    file_path=file_path,
    csv_args={
        "delimiter": ",",
        "quotechar": '"',
        "fieldnames": ["hostel","location","price","locations","restaurants"],
    },
    encoding="utf-8"  # Specify the correct encoding here
)

data = loader.load()

# Split the documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(data)

# Generate embeddings
embeddings = UpstageEmbeddings(model="solar-embedding-1-large-query", api_key="up_kGzaFmJXhtbE3hAC3oqTi6uZZSGvb")

# Create a vector store with Chroma
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

In [None]:
from langchain_upstage import ChatUpstage

llm = ChatUpstage(
  api_key="up_kGzaFmJXhtbE3hAC3oqTi6uZZSGvb",
  base_url="https://api.upstage.ai/v1/solar"
)


# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt", api_key="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")

# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Set up the RAG (Retrieval-Augmented Generation) chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [None]:
message="""Plan a traveling itinerary from the optimized_jeju_itinerary.csv file that has a provided hostel living, and restaurants and locations near the hostel. The budget is 20000 WON and the preferred location is Seogwipo. We start in the coordinates 126.5609945, 33.2532177 and we are there for 4 days.
Ignore opening and closing hours and assume everything opens at 8 A.M and closes at 8 P.M. Plan for each day from 8 AM to 8 PM, tracking each hour. Include 3 locations and 3 restaurants each day from optimized_jeju_itinerary.csv and where the person will be staying. Use an optimal route between the restaurants and locations using coordinates, print the name of the location/restaurant and not its coordinates.
Make sure the route does not have a transportation cost the exceeds the budget. Taxi or Car has a standard cost of 1,340KRW per km, Bus is 150KRW per km, and walking/biking costs 0. Preferred transportation is from user is Bus. Print out the hostel and attach the associated link the user will be staying at."""

response = rag_chain.invoke(message)
print(response)