# Import

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time
import json

# Scraping Name and Score

Scraping name and score from the website https://pvpoke.com/rankings/. <br>
Store Data in raw_data/pokemon_name_rank.json. 

In [2]:
def scrape_rankings_names_scores_to_json():
    # Set up the Selenium WebDriver
    driver = webdriver.Chrome()

    try:
        # Navigate to the rankings page
        url = 'https://pvpoke.com/rankings/'
        driver.get(url)

        # Wait for the page to load completely
        time.sleep(15)

        # Get the page source and parse it with BeautifulSoup
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        # Extract Pokémon rankings data
        pokemon_blocks = soup.find_all('div', class_='rank')
        data = []

        for pokemon in pokemon_blocks:
            name = pokemon.find('span', class_='name').text if pokemon.find('span', class_='name') else "No name"
            score = pokemon.find('div', class_='rating score-rating').text if pokemon.find('div', class_='rating score-rating') else "No score"

            data.append({"name": name, "score": score})

        # Filter out entries with 'No score'
        data = [entry for entry in data if entry['score'] != "No score"]

        # Save data to a JSON file in the raw_data folder
        output_path = 'raw_data/pokemon_name_rank.json'
        with open(output_path, 'w') as f:
            json.dump(data, f, indent=4)

        print(f"Data has been saved to {output_path}")

    finally:
        # Quit the driver
        driver.quit()

In [3]:
# Call the function
scrape_rankings_names_scores_to_json()

Data has been saved to raw_data/pokemon_name_rank.json


# Scraping stats and moves

In [3]:
def scrape_all_pokemon_stats():
    input_path = 'raw_data/pokemon_name_rank.json'
    output_path = 'raw_data/pokemon_stats_moves.json'
    base_url = 'https://pvpoke.com/rankings/all/1500/overall/'
    driver = webdriver.Chrome()

    url = 'https://pvpoke.com/rankings/'
    driver.get(url)
    time.sleep(10)

    try:
        # Read Pokémon names from the input JSON file
        with open(input_path, 'r') as f:
            pokemon_list = json.load(f)

        # Initialize the output JSON file
        with open(output_path, 'w') as f:
            json.dump([], f)

        for pokemon in pokemon_list:
            name = pokemon['name']
            # Replace parentheses, apostrophes, spaces, and hyphens with underscores in the URL
            formatted_name = name.replace('(', '').replace(')', '').replace(' ', '_').replace('-','_').replace("'", '')
            url = f"{base_url}{formatted_name.lower()}/"

            driver.get(url)
            time.sleep(7)
            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')

            # Get detailed stats
            stat_details = soup.find('div', class_='stat-details clear')
            filtered_stats = {}
            if stat_details:
                for stat_row in stat_details.find_all('div', class_='stat-row'):
                    label = stat_row.find('div', class_='label')
                    value = stat_row.find('div', class_='value')
                    if label and value:
                        label_text = label.text.strip()
                        value_text = value.text.strip()
                        if label_text in ['Attack', 'Defense', 'Overall', 'Stamina']:
                            filtered_stats[label_text] = value_text

            # Get fast moves and their details
            fast_moves_section = soup.find('div', class_='moveset fast show-stats clear')
            fast_moves = []
            recommended_fast_move = None
            
            if fast_moves_section:
                move_elements = fast_moves_section.find_all('div', class_='move-detail-template')
                for move in move_elements:
                    if 'hide' not in move.get('class', []):
                        name_span = move.find('span', class_='name')
                        if name_span:
                            move_name = name_span.text.strip()
                            fast_moves.append(move_name)
                            # Check if this move is recommended (has 'selected' class)
                            if 'selected' in move.get('class', []):
                                recommended_fast_move = move_name

            # Get charged moves and their details
            charged_moves_section = soup.find('div', class_='moveset charged show-stats clear')
            charged_moves = []
            recommended_charged_moves = []
            
            if charged_moves_section:
                move_elements = charged_moves_section.find_all('div', class_='move-detail-template')
                for move in move_elements:
                    if 'hide' not in move.get('class', []):
                        name_span = move.find('span', class_='name')
                        if name_span:
                            move_name = name_span.text.strip()
                            charged_moves.append(move_name)
                            # Check if this move is recommended (has 'selected' class)
                            if 'selected' in move.get('class', []):
                                recommended_charged_moves.append(move_name)

            # Save Pokémon name, stats, moves, and recommended moves
            pokemon_data = {
                'name': name,
                'filtered_stats': filtered_stats,
                'fast_moves': fast_moves,
                'charged_moves': charged_moves,
                'recommended_fast_move': recommended_fast_move,
                'recommended_charged_moves': recommended_charged_moves
            }

            # Append the data to the JSON file
            with open(output_path, 'r+') as f:
                existing_data = json.load(f)
                existing_data.append(pokemon_data)
                f.seek(0)
                json.dump(existing_data, f, indent=4)

            # Print the data in a single line to the terminal
            print(f"Name: {name}, Stats: {filtered_stats}, Fast Moves: {fast_moves}, Charged Moves: {charged_moves}, Recommended Fast: {recommended_fast_move}, Recommended Charged: {recommended_charged_moves}")

        print(f"All Pokémon stats have been saved to {output_path}")


    finally:
        driver.quit()

# Call the function
scrape_all_pokemon_stats()

Name: Clodsire, Stats: {'Attack': '94.2', 'Defense': '119.4', 'Stamina': '209', 'Overall': '2352'}, Fast Moves: ['Poison Sting', 'Mud Shot'], Charged Moves: ['Stone Edge', 'Earthquake', 'Sludge Bomb', 'Megahorn*', 'Water Pulse', 'Acid Spray'], Recommended Fast: Poison Sting, Recommended Charged: ['Stone Edge', 'Earthquake']
Name: Diggersby, Stats: {'Attack': '96.3', 'Defense': '141.1', 'Stamina': '171', 'Overall': '2325'}, Fast Moves: ['Quick Attack', 'Mud Shot'], Charged Moves: ['Scorching Sands', 'Fire Punch', 'Return†', 'Dig', 'Earthquake', 'Hyper Beam'], Recommended Fast: Quick Attack, Recommended Charged: ['Scorching Sands', 'Fire Punch']
Name: Diggersby, Stats: {'Attack': '96.3', 'Defense': '141.1', 'Stamina': '171', 'Overall': '2325'}, Fast Moves: ['Quick Attack', 'Mud Shot'], Charged Moves: ['Scorching Sands', 'Fire Punch', 'Return†', 'Dig', 'Earthquake', 'Hyper Beam'], Recommended Fast: Quick Attack, Recommended Charged: ['Scorching Sands', 'Fire Punch']
Name: Forretress, Stat

# Fix Failed Pokemon Data

Code to retry scraping Pokemon with failed data (Overall: "0" or empty moves) with longer wait time.

In [None]:
def fix_failed_pokemon_data():
    """
    Function to retry scraping Pokemon with failed data
    Failed data criteria:
    - Overall stat is "0"
    - Empty fast_moves or charged_moves arrays
    - No recommended moves
    """
    input_path = 'raw_data/pokemon_stats_moves.json'
    output_path = 'raw_data/pokemon_stats_moves_fixed.json'
    base_url = 'https://pvpoke.com/rankings/all/1500/overall/'
    
    # Read existing data
    with open(input_path, 'r') as f:
        pokemon_data = json.load(f)
    
    # Find failed Pokemon
    failed_pokemon = []
    for pokemon in pokemon_data:
        # Check if Pokemon has failed data
        overall_stat = pokemon.get('filtered_stats', {}).get('Overall', '0')
        fast_moves = pokemon.get('fast_moves', [])
        charged_moves = pokemon.get('charged_moves', [])
        
        if (overall_stat == "0" or 
            len(fast_moves) == 0 or 
            len(charged_moves) == 0):
            failed_pokemon.append(pokemon)
    
    print(f"Found {len(failed_pokemon)} Pokemon with failed data")
    
    if len(failed_pokemon) == 0:
        print("No failed Pokemon found!")
        return
    
    # Print names of failed Pokemon
    print("Failed Pokemon:")
    for pokemon in failed_pokemon:
        print(f"- {pokemon['name']}")
    
    driver = webdriver.Chrome()
    
    try:
        # Navigate to main page first
        url = 'https://pvpoke.com/rankings/'
        driver.get(url)
        time.sleep(10)
        
        fixed_data = []
        
        for i, pokemon in enumerate(failed_pokemon):
            name = pokemon['name']
            print(f"\nRetrying {i+1}/{len(failed_pokemon)}: {name}")
            
            # Format name for URL
            formatted_name = name.replace('(', '').replace(')', '').replace(' ', '_').replace('-','_').replace("'", '').replace('.','_')
            url = f"{base_url}{formatted_name.lower()}/"
            
            driver.get(url)
            time.sleep(15)  # Longer wait time
            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            
            # Get detailed stats
            stat_details = soup.find('div', class_='stat-details clear')
            filtered_stats = {}
            if stat_details:
                for stat_row in stat_details.find_all('div', class_='stat-row'):
                    label = stat_row.find('div', class_='label')
                    value = stat_row.find('div', class_='value')
                    if label and value:
                        label_text = label.text.strip()
                        value_text = value.text.strip()
                        if label_text in ['Attack', 'Defense', 'Overall', 'Stamina']:
                            filtered_stats[label_text] = value_text
            
            # Get fast moves
            fast_moves_section = soup.find('div', class_='moveset fast show-stats clear')
            fast_moves = []
            recommended_fast_move = None
            
            if fast_moves_section:
                move_elements = fast_moves_section.find_all('div', class_='move-detail-template')
                for move in move_elements:
                    move_classes = move.get('class', [])
                    if 'hide' not in move_classes:
                        name_span = move.find('span', class_='name')
                        if name_span:
                            move_name = name_span.text.strip()
                            fast_moves.append(move_name)
                            if 'selected' in move_classes:
                                recommended_fast_move = move_name
            
            # Get charged moves
            charged_moves_section = soup.find('div', class_='moveset charged show-stats clear')
            charged_moves = []
            recommended_charged_moves = []
            
            if charged_moves_section:
                move_elements = charged_moves_section.find_all('div', class_='move-detail-template')
                for move in move_elements:
                    move_classes = move.get('class', [])
                    if 'hide' not in move_classes:
                        name_span = move.find('span', class_='name')
                        if name_span:
                            move_name = name_span.text.strip()
                            charged_moves.append(move_name)
                            if 'selected' in move_classes:
                                recommended_charged_moves.append(move_name)
            
            # Create fixed Pokemon data
            fixed_pokemon = {
                'name': name,
                'filtered_stats': filtered_stats,
                'fast_moves': fast_moves,
                'charged_moves': charged_moves,
                'recommended_fast_move': recommended_fast_move,
                'recommended_charged_moves': recommended_charged_moves
            }
            
            fixed_data.append(fixed_pokemon)
            
            # Print results
            overall = filtered_stats.get('Overall', '0')
            print(f"  Overall: {overall}")
            print(f"  Fast moves: {len(fast_moves)} ({fast_moves})")
            print(f"  Charged moves: {len(charged_moves)} ({charged_moves})")
            print(f"  Recommended fast: {recommended_fast_move}")
            print(f"  Recommended charged: {recommended_charged_moves}")
            
            # Check if still failed
            if overall == "0" or len(fast_moves) == 0 or len(charged_moves) == 0:
                print(f"  ⚠️  Still failed after retry!")
            else:
                print(f"  ✅ Successfully fixed!")
        
        # Save fixed data
        with open(output_path, 'w') as f:
            json.dump(fixed_data, f, indent=4)
        
        print(f"\n🎉 Fixed data saved to {output_path}")
        print(f"Total Pokemon processed: {len(fixed_data)}")
        
    finally:
        driver.quit()

# Uncomment to run:
# fix_failed_pokemon_data()

In [7]:
def update_original_data_with_fixes():
    """
    Function to update the original pokemon_stats_moves.json file 
    with the fixed data from pokemon_stats_moves_fixed.json
    """
    original_path = 'raw_data/pokemon_stats_moves.json'
    fixed_path = 'raw_data/pokemon_stats_moves_fixed.json'
    backup_path = 'raw_data/pokemon_stats_moves_backup.json'
    
    # Check if fixed data exists
    try:
        with open(fixed_path, 'r') as f:
            fixed_data = json.load(f)
    except FileNotFoundError:
        print(f"❌ Fixed data file not found: {fixed_path}")
        print("Please run fix_failed_pokemon_data() first!")
        return
    
    # Read original data
    with open(original_path, 'r') as f:
        original_data = json.load(f)
    
    # Create backup
    with open(backup_path, 'w') as f:
        json.dump(original_data, f, indent=4)
    print(f"💾 Backup created: {backup_path}")
    
    # Create a dictionary for quick lookup of fixed data
    fixed_dict = {pokemon['name']: pokemon for pokemon in fixed_data}
    
    # Update original data
    updated_count = 0
    for i, pokemon in enumerate(original_data):
        name = pokemon['name']
        if name in fixed_dict:
            # Replace with fixed data
            original_data[i] = fixed_dict[name]
            updated_count += 1
            print(f"✅ Updated: {name}")
    
    # Save updated data
    with open(original_path, 'w') as f:
        json.dump(original_data, f, indent=4)
    
    print(f"\n🎉 Successfully updated {updated_count} Pokemon in {original_path}")
    print(f"📁 Original data backed up to: {backup_path}")

# Uncomment to run:
# update_original_data_with_fixes()

In [9]:
# Run the fix function
fix_failed_pokemon_data()

Found 54 Pokemon with failed data
Failed Pokemon:
- Zygarde (50% Forme)
- Mr. Mime
- Cherrim (Sunshine)
- Wyrdeer
- Nuzleaf
- Teddiursa
- Onix
- Zygarde (10% Forme)
- Ambipom
- Lycanroc (Dusk)
- Darmanitan (Galarian)
- Meowstic (Male)
- Mr. Rime
- Mr. Mime (Galarian)
- Rampardos
- Xatu
- Bergmite
- Bulbasaur (Shadow)
- Clawitzer
- Deerling
- Shaymin (Sky)
- Pupitar
- Inteleon
- Snover (Shadow)
- Drowzee
- Elgyem
- Gorebyss
- Accelgor
- Porygon-Z (Shadow)
- Flabebe
- Rowlet
- Reuniclus (Shadow)
- Throh
- Duosion (Shadow)
- Snubbull
- Solosis (Shadow)
- Deoxys (Speed)
- Anorith (Shadow)
- Munna
- Duosion
- Solosis
- Timburr (Shadow)
- Snubbull (Shadow)
- Anorith
- Regigigas (Shadow)
- Spoink (Shadow)
- Skiddo
- Spoink
- Clefairy
- Kadabra
- Unown
- Mienfoo
- Slaking
- Slaking (Shadow)

Retrying 1/54: Zygarde (50% Forme)

Retrying 1/54: Zygarde (50% Forme)
  Overall: 0
  Fast moves: 0 ([])
  Charged moves: 0 ([])
  Recommended fast: None
  Recommended charged: []
  ⚠️  Still failed after 

In [10]:
update_original_data_with_fixes()

💾 Backup created: raw_data/pokemon_stats_moves_backup.json
✅ Updated: Zygarde (50% Forme)
✅ Updated: Mr. Mime
✅ Updated: Cherrim (Sunshine)
✅ Updated: Wyrdeer
✅ Updated: Nuzleaf
✅ Updated: Teddiursa
✅ Updated: Onix
✅ Updated: Zygarde (10% Forme)
✅ Updated: Ambipom
✅ Updated: Lycanroc (Dusk)
✅ Updated: Darmanitan (Galarian)
✅ Updated: Meowstic (Male)
✅ Updated: Mr. Rime
✅ Updated: Mr. Mime (Galarian)
✅ Updated: Rampardos
✅ Updated: Xatu
✅ Updated: Bergmite
✅ Updated: Bulbasaur (Shadow)
✅ Updated: Clawitzer
✅ Updated: Deerling
✅ Updated: Shaymin (Sky)
✅ Updated: Pupitar
✅ Updated: Inteleon
✅ Updated: Snover (Shadow)
✅ Updated: Drowzee
✅ Updated: Elgyem
✅ Updated: Gorebyss
✅ Updated: Accelgor
✅ Updated: Porygon-Z (Shadow)
✅ Updated: Flabebe
✅ Updated: Rowlet
✅ Updated: Reuniclus (Shadow)
✅ Updated: Throh
✅ Updated: Duosion (Shadow)
✅ Updated: Snubbull
✅ Updated: Solosis (Shadow)
✅ Updated: Deoxys (Speed)
✅ Updated: Anorith (Shadow)
✅ Updated: Munna
✅ Updated: Duosion
✅ Updated: Solosis
✅ 