# Pokedex-Webscraping

In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from pandasql import sqldf

In [9]:
url = "https://pokemondb.net/pokedex/all"

def download_html(url, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, "w", encoding="utf-8") as file:
            file.write(response.text)
        print(f"Seite als '{filename}' gespeichert")
    else:
        print(f"Fehler beim Herunterladen der Seite. Statuscode: {response.status_code}")

def parse_html(filename):
    with open(filename, "r", encoding="utf-8") as file:
        return BeautifulSoup(file, "html.parser")

# Extraktion der Pokemondaten
def extract_pokemon_data(soup):
    pokedex = []
    pokemon_rows = soup.select("tbody > tr")
    for row in pokemon_rows:
        pokedex.append({
            "id": row.select_one("td.cell-fixed > span").text,
            "name": row.select_one("td.cell-name > a").text,
            "type": [type_.text for type_ in row.select("td.cell-icon > a")],
            "total": row.select_one("td.cell-total").text,
            "hp": row.select("td[class='cell-num']")[0].text,
            "attack": row.select("td[class='cell-num']")[1].text,
            "defense": row.select("td[class='cell-num']")[2].text,
            "sp_atk": row.select("td[class='cell-num']")[3].text,
            "sp_def": row.select("td[class='cell-num']")[4].text,
            "speed": row.select("td[class='cell-num']")[5].text
        })
    return pokedex


# Hauptfunktion

In [10]:
# Hauptfunktion
def main():
    html_filename = "index.html"
    download_html(url, html_filename)
    soup = parse_html(html_filename)
    pokedex = extract_pokemon_data(soup)
    
    df = pd.DataFrame(pokedex)
    print(f"Erfolgreich alle {len(df)} Pokemon in DataFrame gesammelt.")
    
    df_ex = df.explode("type")
    
    # Stärkstes Pokemon jedes Typs
    strongest_pokemon = sqldf("""
    SELECT id, name, type, max(total) as max_total
    FROM df_ex
    GROUP BY type
    ORDER BY max_total DESC""")
    print(strongest_pokemon)
    print()
    
    # Stärkste Angreifer
    strongest_attackers = sqldf("""
    SELECT *
    FROM df_ex
    ORDER BY attack DESC
    LIMIT 5""")
    print(strongest_attackers)
    print()
    
    # Durchschnittliche Werte für jeden Typ
    avg_stats = sqldf("""
    SELECT type, avg(total), avg(hp), avg(attack), avg(defense), avg(sp_atk), avg(sp_def), avg(speed)
    FROM df_ex
    GROUP BY type
    ORDER BY type""")
    print(avg_stats)

# Skript ausführen
if __name__ == "__main__":
    main()


Seite als 'index.html' gespeichert
Erfolgreich alle 1215 Pokemon in DataFrame gesammelt.
      id       name      type max_total
0   0150     Mewtwo   Psychic       780
1   0384   Rayquaza    Flying       780
2   0150     Mewtwo  Fighting       780
3   0384   Rayquaza    Dragon       780
4   0382     Kyogre     Water       770
5   0383    Groudon    Ground       770
6   0383    Groudon      Fire       770
7   0493     Arceus    Normal       720
8   0376  Metagross     Steel       700
9   0248  Tyranitar      Rock       700
10  0646     Kyurem       Ice       700
11  0719    Diancie     Fairy       700
12  0248  Tyranitar      Dark       700
13  0890  Eternatus    Poison       690
14  0487   Giratina     Ghost       680
15  0644     Zekrom  Electric       680
16  0254   Sceptile     Grass       630
17  0127     Pinsir       Bug       600

     id        name      type total  hp attack defense sp_atk sp_def speed
0  0512    Simisage     Grass   498  75     98      63     98     63   101
