In [None]:
import requests
from bs4 import BeautifulSoup
import json
import threading

def parse_html(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    articles = []

    restaurant_items = soup.find_all('div', class_='restaurant-item')
    for item in restaurant_items:
        title = item.find('a', class_='title-text').text.strip()
        
        rating_div = item.find('div', class_='rating-star')
        rating = rating_div.find('div', class_='text').text.strip() if rating_div else None

        extra_info = item.find('div', class_='jsx-1156793088 info')
        if extra_info:
            time_text = extra_info.text.strip().replace("今日營業: ", "")
            time_data = [time.strip() for time in time_text.split(',')]
        else:
            time_data = []

        avg_price_elem = item.find('div', class_='avg-price')
        avg_price = avg_price_elem.text.strip().replace('均消 ', '') if avg_price_elem else None

        address_elem = item.find('div', class_='address-row')
        address = address_elem.text.strip() if address_elem else None

        categories = item.find('div', class_='category-row').find_all('a', class_='category')
        categories = [category.text.strip() for category in categories]
        
        article = {
            'store Name': title,
            'rating': rating,
            'time': time_data,
            'price': avg_price,
            'address': address,
            'category': categories
        }

        articles.append(article)
        
    return articles

def scrape_url(url):
    page = 1
    results = []
    while True:
        full_url = f"{url}?page={page}"
        try:
            articles = parse_html(full_url)
            if not articles:  # If there is no data, break the loop
                break
            results.extend(articles)
            page += 1
            print(page)
        except AttributeError as e:
            print(f"AttributeError on page {page}: {str(e)}")
            break

    return results

def get_all_pages_data(urls):
    all_articles = []
    threads = []

    for url in urls:
        thread = threading.Thread(target=lambda: all_articles.extend(scrape_url(url)))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    return all_articles

base_urls = [
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E6%97%A5%E5%BC%8F',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E9%8D%8B%E9%A1%9E',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E6%97%A5%E5%BC%8F',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E7%87%92%E8%82%89',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E7%B2%BE%E7%B7%BB%E9%AB%98%E7%B4%9A',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E6%97%A9%E5%8D%88%E9%A4%90',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E7%94%9C%E9%BB%9E%E9%A1%9E',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E7%B4%84%E6%9C%83%E9%A4%90%E5%BB%B3%E9%A1%9E'
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E9%9F%93%E5%BC%8F',
    'https://ifoodie.tw/explore/%E5%8F%B0%E5%8C%97%E5%B8%82/list/%E9%A4%90%E9%85%92%E9%A4%A8%2F%E9%85%92%E5%90%A7'
]

articles = get_all_pages_data(base_urls)

json_data = json.dumps(articles, ensure_ascii=False, indent=4)



In [None]:
import json
json_data=json.loads(json_data)
print(json_data)

In [None]:
for item in json_data:
    location = item['地點']
    item['地區']= location[3:6]
    
print(json_data)

In [None]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import certifi
ca=certifi.where()
uri = "mongodb+srv://{account}:{password}@line.kwwffsx.mongodb.net/?retryWrites=true&w=majority"

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'),tlsCAFile=ca)

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

In [None]:

db=client.test
collection=db.resteraunt
# student = {
#     'id':'20230609',
#     'name':'jodern',
#     'age':20,
#     'gender':'male',
#     }

# result = collection.insert_many(json_data)
# print(result)


In [None]:
result = collection.find({'品項': '餐酒館', '地區': '中山區'})
print(result)
for doc in result:
    print(doc['店名'])

