In [4]:
import requests
from bs4 import BeautifulSoup
import json

In [5]:
def scrape_destinations(country, city, attraction_type):
    base_url = f"https://www.tripadvisor.com/Search?q={city}+{attraction_type}&ssrc=h&o="
    destinations = []

    for page in range(0, 50, 10):  # Adjust pagination as necessary
        url = base_url + str(page)
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Adjust selectors to target specific website structure
        attractions = soup.select('.result-title')  # Update with actual selector
        for attraction in attractions:
            try:
                destination_name = attraction.text.strip()
                address = attraction.find_next('span', class_='location').text.strip()
                visiting_hours = "Check website for details"  # Placeholder
                entrance_fee = "N/A"  # Placeholder
                rating = float(attraction.find_next('span', class_='rating').text.strip())
                reviews = int(attraction.find_next('span', class_='review-count').text.strip())
                website_url = attraction['href']  # Relative URL
                
                destinations.append({
                    "country": country,
                    "city": city,
                    "type_of_attraction": attraction_type,
                    "destination_name": destination_name,
                    "visiting_hours": visiting_hours,
                    "entrance_fee": entrance_fee,
                    "recommended_weather": "Any",  # Placeholder
                    "address": address,
                    "rating": rating,
                    "reviews": reviews,
                    "website_url": f"https://www.tripadvisor.com{website_url}",
                    "additional_info": {
                        "description": "N/A",
                        "tips": "N/A",
                        "travel_options": "N/A"
                    }
                })
            except Exception as e:
                print(f"Error parsing attraction: {e}")

    # Save to JSON
    with open(f"{city}_{attraction_type}_destinations.json", "w", encoding='utf-8') as file:
        json.dump({"destinations": destinations}, file, ensure_ascii=False, indent=4)

    print(f"Scraping completed. Data saved for {city}, {attraction_type}.")
    return destinations



In [6]:
# Example usage
scrape_destinations("USA", "New York", "museums")


Scraping completed. Data saved for New York, museums.


[]