In [31]:
import requests
import csv
import os

# Replace with your Kakao API key
api_key = '5ac23e3f4a305df669580f00637c8459'

# Define search areas: west, middle, and east of Jeju
search_areas = [
    {'x': 126.1628, 'y': 33.3946, 'area_name': 'West Jeju'},   # West Jeju
    {'x': 126.570667, 'y': 33.450701, 'area_name': 'Middle Jeju'},  # Middle Jeju
    {'x': 126.9748, 'y': 33.5097, 'area_name': 'East Jeju'}    # East Jeju
]

# Define the request headers
headers = {
    'Authorization': f'KakaoAK {api_key}'
}

# Define the CSV file name
csv_file = 'jeju_restaurants.csv'
file = open(csv_file, "w+")
file.close()

# Create a set to track unique place IDs
unique_place_ids = set()

# Check if the CSV file already exists
file_exists = os.path.isfile(csv_file)

# Open the CSV file in append mode
with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the header if the file does not exist
    writer.writerow(['Place Name', 'Longitude', 'Latitude', 'Phone', 'Average Rating', 'Average Price'])


    for area in search_areas:
        print(f"Searching in {area['area_name']}...")
        
        # Define the search parameters for each area
        params = {
            'query': 'restaurant',
            'x': area['x'],  # Longitude
            'y': area['y'],  # Latitude
            'radius': 20000  # Search radius in meters
        }

        # Make the API request to search for places
        response = requests.get('https://dapi.kakao.com/v2/local/search/keyword.json', headers=headers, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            # Process the data
            for document in data.get('documents', []):
                place_name = document.get('place_name')
                place_id = document.get('id')
                longitude = document.get('x')
                latitude = document.get('y')
                phone = document.get('phone')

                # Skip if the place has already been processed
                if place_id in unique_place_ids:
                    continue
                unique_place_ids.add(place_id)

                # Make another API request to get detailed information including reviews and pricing
                detail_response = requests.get(f'https://place.map.kakao.com/main/v/{place_id}', headers=headers)

                if detail_response.status_code == 200:
                    detail_data = detail_response.json()

                    # Extract reviews and price information (if available)
                    prices = detail_data.get('menuInfo', {}).get('menuList', [])

                    # Extract and calculate the average rating
                    comments = detail_data.get('comment', {}).get('list', [])
                    average_rating = None
                    if comments:
                        total_score = sum(comment.get('point', 0) for comment in comments)
                        review_count = len(comments)
                        average_rating = total_score / review_count

                    # Calculate the average price
                    average_price = None
                    if prices:
                        total_price = 0
                        count = 0
                        for price in prices:
                            price_value_str = price.get('price')
                            if price_value_str:  # Check if the price is not None
                                try:
                                    price_value = int(price_value_str.replace(',', '').replace('원', '').strip())
                                    total_price += price_value
                                    count += 1
                                except ValueError:
                                    print(f"Could not convert price '{price_value_str}' to an integer.")
                        if count > 0:
                            average_price = total_price / count

                    # Write to the CSV file
                    writer.writerow([place_name, longitude, latitude, phone, average_rating, average_price])
                else:
                    print(f"Error fetching details for place ID {place_id}: {detail_response.status_code}")
        else:
            print(f"Error: {response.status_code}")

print("Data collection complete. Results have been saved to", csv_file)


Searching in West Jeju...
Could not convert price '변동가격' to an integer.
Searching in Middle Jeju...
Searching in East Jeju...
Data collection complete. Results have been saved to jeju_restaurants.csv


In [37]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

l = []
o = {}

options = webdriver.ChromeOptions()
# Create a service object
service = Service(ChromeDriverManager().install())
# Initialize the Chrome WebDriver with service and options
driver = webdriver.Chrome(service=service, options=options)

adults = 4
checkin = "2024-09-17"
checkout = "2024-09-18"

driver.get(f"https://www.airbnb.co.in/s/Jeju-Island/homes?adults={adults}&checkin={checkin}&checkout={checkout}")

time.sleep(5)  # Increased sleep time to allow all elements to load
html_content = driver.page_source

driver.quit()

soup = BeautifulSoup(html_content, 'html.parser')

allData = soup.find_all("div", {"itemprop": "itemListElement"})

for i in range(0, len(allData)):
    try:
        o["property-title"] = allData[i].find('div', {'data-testid': 'listing-card-title'}).text.strip()
    except:
        o["property-title"] = None

    try:
        o["rating"] = allData[i].find('div', {'class': 't1a9j9y7'}).text.split()[0]
    except:
        o["rating"] = None

    try:
        o["price"] = allData[i].find('span', {"class": "_14y1gc"}).text.strip().split()[0]
    except:
        o["price"] = None

    try:
        o["price_with_tax"] = allData[i].find('div', {'class': '_i5duul'}).find('div', {"class": "_10d7v0r"}).text.strip().split(" total")[0]
    except:
        o["price_with_tax"] = None

    try:
        o["link"] = "https://www.airbnb.co.in" + allData[i].find('a', href=True)['href']
    except:
        o["link"] = None

    l.append(o)
    o = {}

df = pd.DataFrame(l)
df.to_csv('airbnb.csv', index=False, encoding='utf-8')
print(l)


[{'property-title': 'Boutique hotel in Hahyo-dong, Seogwipo', 'rating': '4.87', 'price': None, 'price_with_tax': '₹2,300', 'link': 'https://www.airbnb.co.in/rooms/52450676?adults=4&search_mode=regular_search&check_in=2024-09-17&check_out=2024-09-18&source_impression_id=p3_1724020899_P3HpeZxqwgiQGUOa&previous_page_section_name=1000&federated_search_id=117b9f6a-1786-4fd2-9f1b-aefd4036140a'}, {'property-title': 'Home in Jeju', 'rating': '4.91', 'price': None, 'price_with_tax': '₹10,746', 'link': 'https://www.airbnb.co.in/rooms/35164636?adults=4&search_mode=regular_search&check_in=2024-09-17&check_out=2024-09-18&source_impression_id=p3_1724020899_P3_p7Oup3zwOalFO&previous_page_section_name=1000&federated_search_id=117b9f6a-1786-4fd2-9f1b-aefd4036140a'}, {'property-title': 'Place to stay in Cheju', 'rating': '4.92', 'price': None, 'price_with_tax': '₹8,620', 'link': 'https://www.airbnb.co.in/rooms/635925161499738222?adults=4&search_mode=regular_search&check_in=2024-09-17&check_out=2024-09-1

In [45]:
# Function to get latitude and longitude using a geocoding API
def get_coordinates(address):
    api_key = 'AIzaSyA_lQsftIpN1Wi4bqovRwjUcRE_4D88H04'  # Replace with your actual API key
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    params = {"address": address, "key": api_key}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        result = response.json()
        if result['results']:
            location = result['results'][0]['geometry']['location']
            return location['lat'], location['lng']
    return None, None

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

l = []
o = {}

options = webdriver.ChromeOptions()
# Create a service object
service = Service(ChromeDriverManager().install())
# Initialize the Chrome WebDriver with service and options
driver = webdriver.Chrome(service=service, options=options)

adults = 2
rooms = 1
children = 0
checkin = "2024-09-19"
checkout = "2024-09-20"
destination = "Jeju Island"

# Parse the check-in and check-out dates
checkin_year, checkin_month, checkin_day = checkin.split("-")
checkout_year, checkout_month, checkout_day = checkout.split("-")

# Construct the correct Booking.com URL
url = (
    f"https://www.booking.com/searchresults.html?"
    f"ss={destination.replace(' ', '+')}&"
    f"checkin_year={checkin_year}&"
    f"checkin_month={checkin_month}&"
    f"checkin_monthday={checkin_day}&"
    f"checkout_year={checkout_year}&"
    f"checkout_month={checkout_month}&"
    f"checkout_monthday={checkout_day}&"
    f"group_adults={adults}&"
    f"no_rooms={rooms}&"
    f"group_children={children}&"
    f"lang=en-us"
)

driver.get(url)

time.sleep(5)  # Increase sleep time to allow all elements to load
html_content = driver.page_source

driver.quit()

soup = BeautifulSoup(html_content, 'html.parser')

# Finding all relevant hotel elements on the page
allData = soup.find_all("div", {"data-testid": "property-card"})

for i in range(0, len(allData)):
    try:
        o["property-title"] = allData[i].find('div', {'data-testid': 'title'}).text.strip()
    except:
        o["property-title"] = None

    try:
        o["rating"] = allData[i].find('div', {'data-testid': 'review-score'}).find('div').text.strip()
    except:
        o["rating"] = None

    try:
        o["price"] = allData[i].find('span', {"data-testid": "price-and-discounted-price"}).text.strip()
    except:
        o["price"] = None

    try:
        o["link"] = allData[i].find('a', href=True)['href']
    except:
        o["link"] = None
        
    try:
        o["location"] = allData[i].find('span', {'data-testid': 'address'}).text.strip()
        # Get latitude and longitude using the geocoding API
        o["latitude"], o["longitude"] = get_coordinates(o["location"])
    except:
        o["location"] = None
        o["latitude"], o["longitude"] = None, None

    l.append(o)
    o = {}

df = pd.DataFrame(l)
df.to_csv('booking_com.csv', index=False, encoding='utf-8')
print(l)


[{'property-title': 'Hotel Yeon', 'rating': 'Scored 9.4 9.4', 'price': '$67', 'link': 'https://www.booking.com/hotel/kr/hotelyeon.html?aid=304142&label=gen173nr-1FCAQoggJCEnNlYXJjaF9qZWp1IGlzbGFuZEgxWARooAKIAQGYATG4ARfIAQzYAQHoAQH4AQOIAgGoAgO4Avj6ibYGwAIB0gIkOWVhZDBiMTgtYzMwNC00MTdiLTkwZTAtYWMyN2Y1NGNjYTRk2AIF4AIB&ucfs=1&arphpl=1&checkin=2024-09-19&checkout=2024-09-20&group_adults=2&req_adults=2&no_rooms=1&group_children=0&req_children=0&hpos=1&hapos=1&sr_order=popularity&srpvid=7f7ca1fc41b00144&srepoch=1724022138&all_sr_blocks=1203338204_392580785_2_2_0_553533&highlighted_blocks=1203338204_392580785_2_2_0_553533&matching_block_id=1203338204_392580785_2_2_0_553533&sr_pri_blocks=1203338204_392580785_2_2_0_553533_9055200&from=searchresults', 'location': 'Seogwipo City, Seogwipo', 'latitude': 33.2532177, 'longitude': 126.5609945}, {'property-title': 'Casaloma Hotel', 'rating': 'Scored 8.4 8.4', 'price': '$86', 'link': 'https://www.booking.com/hotel/kr/casaloma.html?aid=304142&label=gen173

In [49]:
import pandas as pd

# Load the CSV files
airbnb_df = pd.read_csv('airbnb.csv')
booking_com_df = pd.read_csv('booking_com.csv')
jeju_locations_df = pd.read_csv('jeju_locations.csv')
jeju_restaurants_df = pd.read_csv('jeju_restaurants.csv')

# Print the column names of each DataFrame to inspect them
print("airbnb_df columns:", airbnb_df.columns)
print("booking_com_df columns:", booking_com_df.columns)
print("jeju_locations_df columns:", jeju_locations_df.columns)
print("jeju_restaurants_df columns:", jeju_restaurants_df.columns)

# Standardizing the column names across the datasets
if len(airbnb_df.columns) == 6:
    airbnb_df.columns = ["Place Name", "Longitude", "Latitude", "Phone", "Average Rating", "Average Price (WON)"]

if len(booking_com_df.columns) == 7:
    booking_com_df.columns = ["Place Name", "Average Rating", "Average Price (WON)", "Link", "Location", "Latitude", "Longitude"]

if len(jeju_locations_df.columns) == 9:
    jeju_locations_df.columns = ["Place ID", "Place Name", "Address", "Latitude", "Longitude", "Registration Date", "Last Modified", "Jibun Address", "Usage Status"]

if len(jeju_restaurants_df.columns) == 6:
    jeju_restaurants_df.columns = ["Place Name", "Longitude", "Latitude", "Phone", "Average Rating", "Average Price (WON)"]

# Selecting the necessary columns and including 'Link' from booking_com_df
booking_com_df = booking_com_df[["Place Name", "Latitude", "Longitude", "Average Rating", "Average Price (WON)", "Link"]]
jeju_locations_df = jeju_locations_df[["Place Name", "Latitude", "Longitude"]]

# Concatenating all the dataframes
combined_df = pd.concat([airbnb_df, booking_com_df, jeju_locations_df, jeju_restaurants_df], ignore_index=True)
# Save the combined dataframe to a new CSV file
combined_df.to_csv('jeju_itinerary_data.csv', index=False)

print("Combined CSV saved as 'jeju_itinerary_data.csv'")

airbnb_df columns: Index(['property-title', 'rating', 'price', 'price_with_tax', 'link'], dtype='object')
booking_com_df columns: Index(['property-title', 'rating', 'price', 'link', 'location', 'latitude',
       'longitude'],
      dtype='object')
jeju_locations_df columns: Index(['장소ID', '장소명', '도로명주소', '위도', '경도', '등록일시', '수정일시', '지번주소', '사용여부'], dtype='object')
jeju_restaurants_df columns: Index(['Place Name', 'Longitude', 'Latitude', 'Phone', 'Average Rating',
       'Average Price (WON)'],
      dtype='object')
Combined CSV saved as 'jeju_itinerary_data.csv'


In [46]:
from openai import OpenAI # openai==1.2.0
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_upstage import UpstageEmbeddings

file_path = "jeju_itinerary_data.csv"

loader = CSVLoader(
    file_path="jeju_itinerary_data.csv",
    csv_args={
        "delimiter": ",",
        "quotechar": '"',
        "fieldnames": ["property-title","rating","price","price_with_tax","link","Place Name","Latitude","Longitude","Average Rating","Average Price (WON)","Link","Phone"],
    },
)

data = loader.load()

embeddings = UpstageEmbeddings(model="solar-embedding-1-large")



client = OpenAI(
  api_key="up_kGzaFmJXhtbE3hAC3oqTi6uZZSGvb",
  base_url="https://api.upstage.ai/v1/solar"
)



stream = client.chat.completions.create(
  model="solar-1-mini-chat",
  messages=[
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "user",
      "content": """Plan a itinerary from the jeju_itinerary_data.csv file that keeps track budget and location. Format it like this
| Time      | Day 1        | Day 2        | Day 3        | Day 4        | Day 5        | Day 6        | Day 7        |
|-----------|--------------|--------------|--------------|--------------|--------------|--------------|--------------|
| 7:00 AM   |              |              |              |              |              |              |              |
| 8:00 AM   |              |              |              |              |              |              |              |
| 9:00 AM   |              |              |              |              |              |              |              |
| 10:00 AM  |              |              |              |              |              |              |              |
| 11:00 AM  |              |              |              |              |              |              |              |
| 12:00 PM  |              |              |              |              |              |              |              |
| 1:00 PM   |              |              |              |              |              |              |              |
| 2:00 PM   |              |              |              |              |              |              |              |
| 3:00 PM   |              |              |              |              |              |              |              |
| 4:00 PM   |              |              |              |              |              |              |              |
| 5:00 PM   |              |              |              |              |              |              |              |
| 6:00 PM   |              |              |              |              |              |              |              |
| 7:00 PM   |              |              |              |              |              |              |              |
| 8:00 PM   |              |              |              |              |              |              |              |
| 9:00 PM   |              |              |              |              |              |              |              |
| 10:00 PM  |              |              |              |              |              |              |              |
| 11:00 PM  |              |              |              |              |              |              |              |"""
    }
  ],
  stream=True,
)

for chunk in stream:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")

# Use with stream=False
# print(stream.choices[0].message.content)

Itinerary:

Day 1:
- Location: New York City
- Budget: $200
- Activities:
  - Visit the Statue of Liberty ($30)
  - Explore Central Park (Free)
  - Have dinner at a local restaurant ($70)
  - Watch a Broadway show ($100)

Day 2:
- Location: Washington D.C.
- Budget: $150
- Activities:
  - Visit the Lincoln Memorial (Free)
  - Tour the Smithsonian Museums (Free)
  - Have lunch at a local food truck ($20)
  - Visit the U.S. Capitol Building (Free)

Day 3:
- Location: Miami
- Budget

KeyboardInterrupt: 