In [1]:
import requests
from bs4 import BeautifulSoup
import json
import time

browser = "chrome"

USER_AGENTS = {
    "chrome": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
    "safari": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15"
}

headers = {
    "User-Agent": USER_AGENTS[browser]
}

# Step 1: Setup
BASE_URL = "http://darksouls.wikidot.com"
GIFTS_URL = f"{BASE_URL}/gifts"

def get_gift_links():
    response = requests.get(GIFTS_URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'class': 'wiki-content-table'})

    gifts = []
    for row in table.find_all('tr')[1:]:  # skip header
        link_tag = row.find_all('td')[1].find('a')
        name = link_tag.text.strip().title()
        href = BASE_URL + link_tag['href']
        gifts.append({"name": name, "url": href})
    return gifts

def scrape_gift_page(gift):
    response = requests.get(gift["url"], headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    content = soup.find('div', {'id': 'page-content'})

    # Extract in-game description from <p><em> after <h2> "In Game Description"
    in_game_description = ""
    found_description_header = False
    for tag in content.find_all():
        if tag.name in ['h2', 'h3'] and 'in game description' in tag.text.lower():
            found_description_header = True
            continue
        if found_description_header:
            if tag.name in ['h2', 'h3']:
                break  # Stop at next section
            if tag.name == 'p' and tag.find('em'):
                in_game_description += tag.get_text(strip=True) + " "
    
    in_game_description = in_game_description.strip()

    # Extract availability section as before
    availability = []
    for header in content.find_all(['h2', 'h3']):
        if 'availability' in header.text.lower():
            ul = header.find_next('ul')
            if ul:
                availability = [li.get_text(strip=True) for li in ul.find_all('li')]
            break

    return {
        "name": gift["name"],
        "url": gift["url"],
        "in_game_description": in_game_description,
        "availability": availability
    }
# Run scraper
gift_links = get_gift_links()

In [2]:
gift_details = []

for gift in gift_links:
    print(f"Scraping {gift['name']}...")
    gift_data = scrape_gift_page(gift)
    try:
        gift_details.append(gift_data)
        time.sleep(1)  # polite pause
    except Exception as e:
        print(f"Failed to scrape {gift['name']}: {e}")


Scraping Binoculars...
Scraping Black Firebomb...
Scraping Divine Blessing...
Scraping Master Key...
Scraping Old Witch'S Ring...
Scraping Pendant...
Scraping Tiny Being'S Ring...
Scraping Twin Humanities...


In [4]:

# Save to JSON
with open("dark_souls_gifts_detailed.json", "w") as f:
    json.dump(gift_details, f, indent=2)