In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "http://darksouls.wikidot.com"
SPELLS_URL = f"{BASE_URL}/pyromancies"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

def scrape_main_table():
    response = requests.get(SPELLS_URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find("table", class_="wiki-content-table")
    rows = table.find_all("tr")[1:]

    spells = []
    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 7:
            continue
        name_tag = cols[1].find("a")
        name = name_tag.text.strip()
        url = BASE_URL + name_tag['href']
        spells.append({
            "name": name,
            "url": url,
            "uses": cols[2].text.strip(),
            "slots": cols[3].text.strip(),
            "short_description": cols[4].text.strip(),
            "location": cols[5].get_text(separator=" / ", strip=True),
            "affinity": cols[6].text.strip()
        })
    return spells

def scrape_spell_details(url):
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        content_div = soup.find("div", {"id": "page-content"})

        # In-Game Description
        desc_header = content_div.find("h2", string=lambda s: s and "In-Game Description" in s)
        in_game_description = ""
        if desc_header:
            for sibling in desc_header.find_next_siblings():
                if sibling.name == "p":
                    in_game_description += sibling.get_text(separator=" ", strip=True) + " "
                elif sibling.name == "h2":
                    break
            in_game_description = in_game_description.strip()

        # Availability
        availability_header = content_div.find("h2", string=lambda s: s and "Availability" in s)
        availability = []
        if availability_header:
            ul = availability_header.find_next_sibling("ul")
            if ul:
                availability = [li.get_text(separator=" ", strip=True) for li in ul.find_all("li", recursive=False)]

        return {
            "in_game_description": in_game_description,
            "availability": availability
        }

    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return {
            "in_game_description": None,
            "availability": None
        }

# Run the full scraping process
spells = scrape_main_table()

for spell in spells:
    print(f"Scraping: {spell['name']}")
    details = scrape_spell_details(spell['url'])
    spell["in_game_description"] = details["in_game_description"]
    spell["availability"] = details["availability"]
    time.sleep(1)  # polite delay


Scraping: Acid Surge
Scraping: Black Flame
Scraping: Chaos Fire Whip
Scraping: Chaos Storm
Scraping: Combustion
Scraping: Fire Orb
Scraping: Fire Surge
Scraping: Fire Tempest
Scraping: Fire Whip
Scraping: Fireball
Scraping: Firestorm
Scraping: Flash Sweat
Scraping: Great Chaos Fireball
Scraping: Great Combustion
Scraping: Great Fireball
Scraping: Iron Flesh
Scraping: Poison Mist
Scraping: Power Within
Scraping: Toxic Mist
Scraping: Undead Rapport


In [2]:
import json
with open("dark_souls_pyromancies_full.json", "w", encoding="utf-8") as f:
    json.dump(spells, f, indent=2, ensure_ascii=False)

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "http://darksouls.wikidot.com"
SPELLS_URL = f"{BASE_URL}/sorceries"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

def scrape_main_table():
    response = requests.get(SPELLS_URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find("table", class_="wiki-content-table")
    rows = table.find_all("tr")[1:]

    spells = []
    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 8:
            continue
        name_tag = cols[1].find("a")
        name = name_tag.text.strip()
        url = BASE_URL + name_tag['href']
        spells.append({
            "name": name,
            "url": url,
            "uses": cols[2].text.strip(),
            "slots": cols[4].text.strip(),
            "short_description": cols[5].text.strip(),
            "location": cols[6].get_text(separator=" / ", strip=True),
            "affinity": cols[7].text.strip()
        })
    return spells

def scrape_spell_details(url):
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        content_div = soup.find("div", {"id": "page-content"})

        # In-Game Description
        desc_header = content_div.find("h2", string=lambda s: s and "In-Game Description" in s)
        in_game_description = ""
        if desc_header:
            for sibling in desc_header.find_next_siblings():
                if sibling.name == "p":
                    in_game_description += sibling.get_text(separator=" ", strip=True) + " "
                elif sibling.name == "h2":
                    break
            in_game_description = in_game_description.strip()

        # Availability
        availability_header = content_div.find("h2", string=lambda s: s and "Availability" in s)
        availability = []
        if availability_header:
            ul = availability_header.find_next_sibling("ul")
            if ul:
                availability = [li.get_text(separator=" ", strip=True) for li in ul.find_all("li", recursive=False)]

        return {
            "in_game_description": in_game_description,
            "availability": availability
        }

    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return {
            "in_game_description": None,
            "availability": None
        }

# Run the full scraping process
spells1 = scrape_main_table()

for spell in spells1:
    print(f"Scraping: {spell['name']}")
    details = scrape_spell_details(spell['url'])
    spell["in_game_description"] = details["in_game_description"]
    spell["availability"] = details["availability"]
    time.sleep(1)  # polite delay


Scraping: Aural Decoy
Scraping: Cast Light
Scraping: Chameleon
Scraping: Crystal Magic Weapon
Scraping: Crystal Soul Spear
Scraping: Dark Bead
Scraping: Dark Fog
Scraping: Dark Orb
Scraping: Fall Control
Scraping: Great Heavy Soul Arrow
Scraping: Great Magic Weapon
Scraping: Great Soul Arrow
Scraping: Heavy Soul Arrow
Scraping: Hidden Body
Scraping: Hidden Weapon
Scraping: Homing Crystal Soulmass
Scraping: Homing Soulmass
Scraping: Hush
Scraping: Magic Shield
Scraping: Magic Weapon
Scraping: Pursuers
Scraping: Remedy
Scraping: Repair
Scraping: Resist Curse
Scraping: Soul Arrow
Scraping: Soul Spear
Scraping: Strong Magic Shield
Scraping: White Dragon Breath


In [4]:
import json
with open("dark_souls_sorceries_full.json", "w", encoding="utf-8") as f:
    json.dump(spells, f, indent=2, ensure_ascii=False)

In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "http://darksouls.wikidot.com"
SPELLS_URL = f"{BASE_URL}/miracles"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

def scrape_main_table():
    response = requests.get(SPELLS_URL, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find("table", class_="wiki-content-table")
    rows = table.find_all("tr")[1:]

    spells = []
    for row in rows:
        cols = row.find_all("td")
        if len(cols) < 8:
            continue
        name_tag = cols[1].find("a")
        name = name_tag.text.strip()
        url = BASE_URL + name_tag['href']
        spells.append({
            "name": name,
            "url": url,
            "uses": cols[2].text.strip(),
            "slots": cols[4].text.strip(),
            "short_description": cols[5].text.strip(),
            "location": cols[6].get_text(separator=" / ", strip=True),
            "affinity": cols[7].text.strip()
        })
    return spells

def scrape_spell_details(url):
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        content_div = soup.find("div", {"id": "page-content"})

        # In-Game Description
        desc_header = content_div.find("h2", string=lambda s: s and "In-Game Description" in s)
        in_game_description = ""
        if desc_header:
            for sibling in desc_header.find_next_siblings():
                if sibling.name == "p":
                    in_game_description += sibling.get_text(separator=" ", strip=True) + " "
                elif sibling.name == "h2":
                    break
            in_game_description = in_game_description.strip()

        # Availability
        availability_header = content_div.find("h2", string=lambda s: s and "Availability" in s)
        availability = []
        if availability_header:
            ul = availability_header.find_next_sibling("ul")
            if ul:
                availability = [li.get_text(separator=" ", strip=True) for li in ul.find_all("li", recursive=False)]

        return {
            "in_game_description": in_game_description,
            "availability": availability
        }

    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return {
            "in_game_description": None,
            "availability": None
        }

# Run the full scraping process
spells2 = scrape_main_table()

for spell in spells2:
    print(f"Scraping: {spell['name']}")
    details = scrape_spell_details(spell['url'])
    spell["in_game_description"] = details["in_game_description"]
    spell["availability"] = details["availability"]
    time.sleep(1)  # polite delay


Scraping: Bountiful Sunlight
Scraping: Darkmoon Blade
Scraping: Emit Force
Scraping: Force
Scraping: Gravelord Greatsword Dance
Scraping: Gravelord Sword Dance
Scraping: Great Heal Excerpt
Scraping: Great Heal
Scraping: Great Lightning Spear
Scraping: Great Magic Barrier
Scraping: Heal
Scraping: Homeward
Scraping: Karmic Justice
Scraping: Lightning Spear
Scraping: Magic Barrier
Scraping: Replenishment
Scraping: Seek Guidance
Scraping: Soothing Sunlight
Scraping: Sunlight Blade
Scraping: Sunlight Spear
Scraping: Tranquil Walk of Peace
Scraping: Vow of Silence
Scraping: Wrath of the Gods


In [6]:
import json
with open("dark_souls_miracles_full.json", "w", encoding="utf-8") as f:
    json.dump(spells, f, indent=2, ensure_ascii=False)