In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
from tkinter import Tk, Label, Button, Listbox, Entry, StringVar
from PIL import Image, ImageTk
from io import BytesIO

# Step 1: Webscraping
def scrape_pokemon_data():
    url = "https://pokemondb.net/pokedex/all"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'id': 'pokedex'})

    headers = [th.text for th in table.find('thead').find_all('th')]
    rows = []
    for tr in table.find('tbody').find_all('tr'):
        cols = [td.text.strip() for td in tr.find_all('td')]
        sprite_url = tr.find('td').find('img')['src']  # Sprite image
        cols.append(sprite_url)
        rows.append(cols)

    headers.append("Sprite")
    df = pd.DataFrame(rows, columns=headers)
    return df

pokemon_df = scrape_pokemon_data()
print("Columns in the DataFrame:", pokemon_df.columns)

# Ensure correct column names
if 'Type 1' not in pokemon_df.columns:
    pokemon_df.rename(columns={'Type': 'Type 1'}, inplace=True)

# Ensure numeric columns are properly converted
numeric_columns = ['Total', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
for col in numeric_columns:
    if col in pokemon_df.columns:
        pokemon_df[col] = pd.to_numeric(pokemon_df[col], errors='coerce')

pokemon_df.to_csv("pokemon_data.csv", index=False)
print("Data scraped and saved to pokemon_data.csv")

# Step 2: Exploratory Data Analysis
def strongest_pokemon(df):
    strongest = df.sort_values(by='Total', ascending=False).groupby('Type 1').first()
    print("Strongest Pokemon by Type:\n", strongest[['Name', 'Total']])

def best_attackers(df):
    best = df.sort_values(by='Attack', ascending=False).head(10)
    print("Best Attackers:\n", best[['Name', 'Attack']])

def average_stats(df):
    averages = df.groupby('Type 1').mean(numeric_only=True)
    print("Average Stats by Type:\n", averages[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']])

def fastest_pokemon(df):
    fastest = df.sort_values(by='Speed', ascending=False).head(10)
    print("Fastest Pokemon:\n", fastest[['Name', 'Speed']])

def highest_avg_stat_type(df):
    averages = df.groupby('Type 1').mean(numeric_only=True)
    highest = averages.idxmax()
    print("Type with Highest Average Stats:")
    print(highest)

strongest_pokemon(pokemon_df)
best_attackers(pokemon_df)
average_stats(pokemon_df)
fastest_pokemon(pokemon_df)
highest_avg_stat_type(pokemon_df)

# Visualization
def plot_avg_stats(df):
    averages = df.groupby('Type 1').mean(numeric_only=True)
    averages[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].plot(kind='bar', figsize=(15, 7))
    plt.title('Average Stats by Type')
    plt.ylabel('Average Value')
    plt.xlabel('Type')
    plt.tight_layout()
    plt.show()

def plot_attack_vs_defense(df):
    plt.figure(figsize=(10, 6))
    plt.scatter(df['Attack'], df['Defense'], alpha=0.5)
    plt.title('Attack vs. Defense')
    plt.xlabel('Attack')
    plt.ylabel('Defense')
    plt.show()

# Uncomment to generate plots
# plot_avg_stats(pokemon_df)
# plot_attack_vs_defense(pokemon_df)

# Step 3: BONUS - GUI Application
def show_pokemon_data():
    def display_sprite(event):
        selected = listbox.get(listbox.curselection())
        sprite_url = pokemon_df[pokemon_df['Name'] == selected]['Sprite'].values[0]
        response = requests.get(sprite_url)
        img_data = Image.open(BytesIO(response.content))
        img = ImageTk.PhotoImage(img_data)
        sprite_label.configure(image=img)
        sprite_label.image = img

    def filter_pokemon():
        filter_text = filter_var.get().strip().lower()
        filtered = pokemon_df[pokemon_df['Type 1'].str.lower().str.contains(filter_text, na=False)]
        listbox.delete(0, 'end')
        for name in filtered['Name']:
            listbox.insert('end', name)

    root = Tk()
    root.title("Pokemon Viewer")

    Label(root, text="Select a Pokemon:").pack()

    filter_var = StringVar()
    Entry(root, textvariable=filter_var).pack()
    Button(root, text="Filter", command=filter_pokemon).pack()

    listbox = Listbox(root)
    for name in pokemon_df['Name']:
        listbox.insert('end', name)
    listbox.pack()

    sprite_label = Label(root)
    sprite_label.pack()

    listbox.bind("<<ListboxSelect>>", display_sprite)

    Button(root, text="Exit", command=root.quit).pack()

    root.mainloop()

# Uncomment below to launch GUI application
# show_pokemon_data()

Columns in the DataFrame: Index(['#', 'Name', 'Type', 'Total', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Sprite'],
      dtype='object')
Data scraped and saved to pokemon_data.csv
Strongest Pokemon by Type:
                                    Name  Total
Type 1                                        
Bug                              Pinsir    500
Bug Dark                          Lokix    450
Bug Electric                   Vikavolt    500
Bug Fairy                      Ribombee    464
Bug Fighting   Heracross Mega Heracross    600
...                                 ...    ...
Water Ice                        Lapras    535
Water Poison                 Tentacruel    515
Water Psychic      Slowbro Mega Slowbro    590
Water Rock                   Carracosta    495
Water Steel                    Empoleon    530

[221 rows x 2 columns]
Best Attackers:
                           Name  Attack
201       Mewtwo Mega Mewtwo X     190
274   Heracross Mega Heracross     185