# Download the JSON files

In [21]:
import requests
import json
import os
from concurrent.futures import ThreadPoolExecutor

# Create directory to save pokemons
os.makedirs("pokemons", exist_ok=True)

# Fetch data
url = "https://pokeapi.co/api/v2/pokemon?limit=100000&offset=0"
response = requests.get(url)
all_pkmns = response.json()
pokelist = all_pkmns['results']

def fetch_and_save_pokemon_data(pokemon):
    """Fetch and save pokemon data."""
    try:
        # URLs for species and pokemon data
        url_pokemon = pokemon['url']
        url_species = f"https://pokeapi.co/api/v2/pokemon-species/{pokemon['url'].split('/')[-2]}/"
        
        # Fetch data
        response_species = requests.get(url_species)
        response_pokemon = requests.get(url_pokemon)
        
        # Save data to JSON files
        data = {
            'species': response_species.json(),
            'pokemon': response_pokemon.json()
        }
        
        pkmn_id = data['pokemon']['id']
        
        with open(f"pokemons/{pkmn_id}-{pokemon['name']}.json", "w") as f:
            json.dump(data['pokemon'], f)
        
        with open(f"pokemons/{pkmn_id}-{pokemon['name']}-species.json", "w") as f:
            json.dump(data['species'], f)
        
        print(f"Saved {pkmn_id}-{pokemon['name']}")
    except Exception as e:
        print(f"Failed to fetch/save {pokemon['name']}: {e}")

# Use ThreadPoolExecutor for concurrency
with ThreadPoolExecutor(max_workers=10) as executor:
    # Submit tasks
    futures = [
        executor.submit(fetch_and_save_pokemon_data, p)
        for p in pokelist
    ]

    # Wait for all tasks to complete
    for future in futures:
        future.result()  # This will raise exceptions if any occurred

print("All data fetched and saved!")

Saved 1-bulbasaur
Saved 2-ivysaur
Saved 7-squirtle
Saved 5-charmeleon
Saved 10-caterpie
Saved 9-blastoise
Saved 12-butterfree
Saved 3-venusaur
Saved 8-wartortle
Saved 4-charmander
Saved 14-kakuna
Saved 6-charizard
Saved 16-pidgey
Saved 17-pidgeotto
Saved 11-metapod
Saved 13-weedle
Saved 18-pidgeot
Saved 19-rattata
Saved 15-beedrill
Saved 21-spearow
Saved 25-pikachu
Saved 24-arbok
Saved 23-ekans
Saved 30-nidorina
Saved 33-nidorino
Saved 29-nidoran-f
Saved 22-fearow
Saved 20-raticate
Saved 28-sandslash
Saved 34-nidoking
Saved 27-sandshrew
Saved 32-nidoran-m
Saved 39-jigglypuff
Saved 31-nidoqueen
Saved 42-golbat
Saved 37-vulpix
Saved 26-raichu
Saved 38-ninetales
Saved 46-paras
Saved 48-venonat
Saved 36-clefable
Saved 35-clefairy
Saved 40-wigglytuff
Saved 49-venomoth
Saved 41-zubat
Saved 45-vileplume
Saved 44-gloom
Saved 43-oddish
Saved 50-diglett
Saved 53-persian
Saved 51-dugtrio
Saved 47-parasect
Saved 52-meowth
Saved 54-psyduck
Saved 56-mankey
Saved 58-growlithe
Saved 59-arcanine
Saved 

# Serialize all data from JSONs

In [22]:
from dataclasses import dataclass
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from typing import Optional, List
import json

# Constants
POKEMON_DIR = Path("pokemons")


# Define the Pokemon dataclass.
@dataclass
class Pokemon:
    id: int
    name: str
    type1: str
    type2: Optional[str]
    height: int
    weight: int
    color: str
    gen: int
    is_legendary: bool

def serialize_pokemon(pokemon_id: int) -> Pokemon:
    """
    Given a Pokémon ID, find its corresponding JSON files and return a serialized Pokemon instance.
    It is assumed that for each pokemon_id there are two files in the 'pokemons' directory:
      - The main pokemon data (filename containing '{id}-*.json' but not '-species')
      - The species data (filename containing '{id}-species.json')
    """
    
    # Find files matching the pokemon_id (both normal and species file)
    files = list(POKEMON_DIR.glob(f"{pokemon_id}-*.json"))
    print(f"Serializing files: {files}")
    
    # Identify the main pokemon file and the species file.
    pokemon_file = next((f for f in files if "-species" not in f.name), None)
    species_file = next((f for f in files if "-species" in f.name), None)
    
    if not pokemon_file or not species_file:
        raise FileNotFoundError(f"Required files for Pokemon id {pokemon_id} not found.")
    
    # Load data from the JSON files.
    with pokemon_file.open("r") as p_file, species_file.open("r") as s_file:
        pokemon_data = json.load(p_file)
        species_data = json.load(s_file)
    
    # Extract generation number from the URL.
    gen_url = species_data.get('generation', {}).get('url', '')
    generation = int(gen_url.rstrip('/').split("/")[-1]) if gen_url else 0
    
    # Determine the second type if available.
    type2_value = (
        pokemon_data['types'][1]['type']['name']
        if len(pokemon_data['types']) > 1
        else None
    )
    
    return Pokemon(
        id=pokemon_id,
        name=pokemon_data['name'],
        type1=pokemon_data['types'][0]['type']['name'],
        type2=type2_value,
        height=pokemon_data['height'],
        weight=pokemon_data['weight'],
        color=species_data['color']['name'],
        gen=generation,
        is_legendary=species_data['is_legendary']
    )

# Extract unique Pokémon IDs from all JSON file names.
pokemon_ids = {
    int(f.name.split("-")[0])
    for f in POKEMON_DIR.glob("*.json")
    if f.name[0].isdigit()
}

if not pokemon_ids:
    raise RuntimeError("No pokemon files found in the 'pokemons' directory.")


# Use ThreadPoolExecutor to serialize all Pokémon concurrently.
with ThreadPoolExecutor(max_workers=50) as executor:
    # Create tasks for each pokemon_id in the range 0 to max_pokemon_id (inclusive)
    futures = [executor.submit(serialize_pokemon, i)
               for i in range(min(pokemon_ids), max(pokemon_ids) + 1)]
    
    # Collect results. Note: the order here will correspond to the range order.
    results: List[Pokemon] = [future.result() for future in futures]

# 'results' now contains the list of serialized Pokemon objects.
print(f"Serialized {len(results)} Pokémon.")

Serializing files: [PosixPath('pokemons/2-ivysaur-species.json'), PosixPath('pokemons/2-ivysaur.json')]
Serializing files: [PosixPath('pokemons/4-charmander-species.json'), PosixPath('pokemons/4-charmander.json')]
Serializing files: [PosixPath('pokemons/32-nidoran-m-species.json'), PosixPath('pokemons/32-nidoran-m.json')]
Serializing files: [PosixPath('pokemons/27-sandshrew-species.json'), PosixPath('pokemons/27-sandshrew.json')]
Serializing files: [PosixPath('pokemons/30-nidorina.json'), PosixPath('pokemons/30-nidorina-species.json')]
Serializing files: [PosixPath('pokemons/1-bulbasaur.json'), PosixPath('pokemons/1-bulbasaur-species.json')]
Serializing files: [PosixPath('pokemons/3-venusaur.json'), PosixPath('pokemons/3-venusaur-species.json')]
Serializing files: [PosixPath('pokemons/12-butterfree-species.json'), PosixPath('pokemons/12-butterfree.json')]
Serializing files: [PosixPath('pokemons/21-spearow.json'), PosixPath('pokemons/21-spearow-species.json')]
Serializing files: [PosixP

# Data to DataFrame

In [23]:
import pandas as pd

# Convert the list of Pokemon objects to a DataFrame.
df = pd.DataFrame(results)
print(df.head())

   id        name  type1   type2  height  weight  color  gen  is_legendary
0   1   bulbasaur  grass  poison       7      69  green    1         False
1   2     ivysaur  grass  poison      10     130  green    1         False
2   3    venusaur  grass  poison      20    1000  green    1         False
3   4  charmander   fire    None       6      85    red    1         False
4   5  charmeleon   fire    None      11     190    red    1         False


# CSV Export

In [25]:
df.to_csv("pokemon_data.csv", index=False)