1. Webscraping
a. Using an AI of your choice (e.g., ki.th-koeln.de), write a webscraper, that turns the table from
https://pokemondb.net/pokedex/all into a pandas DataFrame.
b. Bonus: Make your tool also retrieve and display the sprite images.#

2. Exploratory data analysis
Using any AI, write some code that explores some aspects of the dataset, for example:
a. What are the strongest pokemon (of each type)?
b. What are the best attackers?
c. What are the averages of stats for eacht type?

In [16]:


import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the URL for the Pokémon database
url = "https://pokemondb.net/pokedex/all"

# Send a GET request to the webpage
response = requests.get(url)
response.raise_for_status()  # Ensure the request was successful

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, "html.parser")

# Find the table on the page
pokemon_table = soup.find("table", {"id": "pokedex"})

# Extract the headers
headers = [th.text.strip() for th in pokemon_table.find("thead").find_all("th")]

# Extract the rows
rows = []
sprite_links = []  # To store sprite image URLs
for row in pokemon_table.find("tbody").find_all("tr"):
    cells = row.find_all("td")
    row_data = [cell.text.strip() for cell in cells]
    rows.append(row_data)

    # Extract sprite image URL (bonus task)
    sprite_img = row.find("td").find("img")
    if sprite_img:
        sprite_links.append(sprite_img["src"])
    else:
        sprite_links.append(None)

# Create a DataFrame from the extracted data
pokemon_df = pd.DataFrame(rows, columns=headers)

# Add the sprite links to the DataFrame (bonus task)
pokemon_df["Sprite"] = sprite_links

# Convert numeric columns to appropriate data types
numeric_columns = ["#", "Total", "HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"]
for column in numeric_columns:
    pokemon_df[column] = pd.to_numeric(pokemon_df[column], errors="coerce")

# Function to clean and split types
def split_types(row):
    return row.split() if isinstance(row, str) else []

pokemon_df["Type 1"] = pokemon_df["Type"].apply(lambda x: split_types(x)[0] if len(split_types(x)) > 0 else None)
pokemon_df["Type 2"] = pokemon_df["Type"].apply(lambda x: split_types(x)[1] if len(split_types(x)) > 1 else None)

# EDA Code
# a. Strongest Pokémon of each type
strongest_per_type = pokemon_df.groupby("Type 1")["Total"].idxmax()
strongest_pokemon = pokemon_df.loc[strongest_per_type, ["Name", "Type 1", "Total"]]
print("Strongest Pokémon of each type:")
print(strongest_pokemon)

# b. Best attackers
best_attackers = pokemon_df.sort_values("Attack", ascending=False).head(10)
print("\nTop 10 Best Attackers:")
print(best_attackers[["Name", "Type 1", "Attack"]])

# c. Averages of stats for each type
average_stats_by_type = pokemon_df.groupby("Type 1")[["HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed", "Total"]].mean()
print("\nAverage Stats for Each Type:")
print(average_stats_by_type)

# Optionally, save results to CSV
strongest_pokemon.to_csv("strongest_pokemon.csv", index=False)
best_attackers.to_csv("best_attackers.csv", index=False)
average_stats_by_type.to_csv("average_stats_by_type.csv")


Strongest Pokémon of each type:
                          Name    Type 1  Total
168         Pinsir Mega Pinsir       Bug    600
862                    Yveltal      Dark    680
475     Rayquaza Mega Rayquaza    Dragon    780
1192                  Miraidon  Electric    670
1054      Zacian Crowned Sword     Fairy    700
1056  Zamazenta Crowned Shield  Fighting    700
314                      Ho-oh      Fire    680
767   Tornadus Incarnate Forme    Flying    580
597     Giratina Altered Forme     Ghost    680
319     Sceptile Mega Sceptile     Grass    630
473     Groudon Primal Groudon    Ground    770
191                   Articuno       Ice    580
605                     Arceus    Normal    720
1058       Eternatus Eternamax    Poison   1125
201       Mewtwo Mega Mewtwo X   Psychic    780
312   Tyranitar Mega Tyranitar      Rock    700
462   Metagross Mega Metagross     Steel    700
471       Kyogre Primal Kyogre     Water    770

Top 10 Best Attackers:
                          Name  

3. BONUS: Build your own cool pokedex application with a graphical interface using AI!


In [35]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import tkinter as tk
from tkinter import ttk
from PIL import Image, ImageTk
from io import BytesIO

# Define the URL for the Pokémon database
url = "https://pokemondb.net/pokedex/all"

# Send a GET request to the webpage
response = requests.get(url)
response.raise_for_status()  # Ensure the request was successful

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, "html.parser")

# Find the table on the page
pokemon_table = soup.find("table", {"id": "pokedex"})

# Extract the headers
headers = [th.text.strip() for th in pokemon_table.find("thead").find_all("th")]

# Extract the rows
rows = []
sprite_links = []  # To store sprite image URLs
for row in pokemon_table.find("tbody").find_all("tr"):
    cells = row.find_all("td")
    row_data = [cell.text.strip() for cell in cells]
    rows.append(row_data)

    # Extract sprite image URL (bonus task)
    sprite_img = row.find("td").find("img")
    if sprite_img:
        sprite_links.append(sprite_img["src"])
    else:
        sprite_links.append(None)

# Create a DataFrame from the extracted data
pokemon_df = pd.DataFrame(rows, columns=headers)

# Add the sprite links to the DataFrame (bonus task)
pokemon_df["Sprite"] = sprite_links

# Convert numeric columns to appropriate data types
numeric_columns = ["#", "Total", "HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"]
for column in numeric_columns:
    pokemon_df[column] = pd.to_numeric(pokemon_df[column], errors="coerce")

# Function to clean and split types
def split_types(row):
    return row.split() if isinstance(row, str) else []

pokemon_df["Type 1"] = pokemon_df["Type"].apply(lambda x: split_types(x)[0] if len(split_types(x)) > 0 else None)
pokemon_df["Type 2"] = pokemon_df["Type"].apply(lambda x: split_types(x)[1] if len(split_types(x)) > 1 else None)

# GUI Application for Pokédex
class PokedexApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Pokédex")

        # Search bar
        self.search_label = tk.Label(root, text="Search Pokemon:")
        self.search_label.grid(row=0, column=0, padx=10, pady=10)
        
        self.search_entry = tk.Entry(root)
        self.search_entry.grid(row=0, column=1, padx=10, pady=10)
        
        self.search_button = tk.Button(root, text="Search", command=self.search_pokemon)
        self.search_button.grid(row=0, column=2, padx=10, pady=10)

        # Result frame
        self.result_frame = tk.Frame(root)
        self.result_frame.grid(row=1, column=0, columnspan=3, padx=10, pady=10)

        # Image and details
        self.sprite_label = tk.Label(self.result_frame)
        self.sprite_label.grid(row=0, column=0, padx=10, pady=10)
        
        self.details_text = tk.Text(self.result_frame, width=50, height=10)
        self.details_text.grid(row=0, column=1, padx=10, pady=10)

    def search_pokemon(self):
        name = self.search_entry.get().strip().capitalize()
        pokemon = pokemon_df[pokemon_df["Name"] == name]

        if pokemon.empty:
            self.details_text.delete("1.0", tk.END)
            self.details_text.insert(tk.END, "Pokémon not found.")
            self.sprite_label.config(image="")
        else:
            pokemon = pokemon.iloc[0]

            # Display details
            details = (
                f"Name: {pokemon['Name']}\n"
                f"Type: {pokemon['Type']}\n"
                f"Total: {pokemon['Total']}\n"
                f"HP: {pokemon['HP']}\n"
                f"Attack: {pokemon['Attack']}\n"
                f"Defense: {pokemon['Defense']}\n"
                f"Sp. Atk: {pokemon['Sp. Atk']}\n"
                f"Sp. Def: {pokemon['Sp. Def']}\n"
                f"Speed: {pokemon['Speed']}\n"
            )
            self.details_text.delete("1.0", tk.END)
            self.details_text.insert(tk.END, details)

            # Display sprite
            sprite_url = pokemon["Sprite"]
            if sprite_url:
                sprite_response = requests.get(sprite_url)
                sprite_image = Image.open(BytesIO(sprite_response.content))
                sprite_photo = ImageTk.PhotoImage(sprite_image)
                self.sprite_label.config(image=sprite_photo)
                self.sprite_label.image = sprite_photo
            else:
                self.sprite_label.config(image="")

# Run the application
if __name__ == "__main__":
    root = tk.Tk()
    app = PokedexApp(root)
    root.mainloop()
