In [1]:
import pandas as pd

# Specify your input and output CSV filenames
input_csv = "pokedex.csv"
output_csv = "pokedex_filtered.csv"

# Load the dataset into a DataFrame
df = pd.read_csv(input_csv)

# List the columns you want to remove
# Adjust this list based on which columns you don't need
columns_to_drop = [
    "german_name", "japanese_name", "is_sub_legendary", "species",
    "is_mythical", "abilities_number", "ability_hidden", "catch_rate",
    "base_experience", "egg_type_number", "egg_type_2", "egg_type_1",    
    "percentage_male", "egg_cycles","growth_rate"
]

# Drop the specified columns (if they exist in the DataFrame)
df_filtered = df.drop(columns=[col for col in columns_to_drop if col in df.columns])

# Save the filtered DataFrame to a new CSV file
df_filtered.to_csv(output_csv, index=False)

print(f"Filtered CSV saved as {output_csv}")

Filtered CSV saved as pokedex_filtered.csv


In [2]:
# File names (update if necessary)
filtered_csv = "pokedex_filtered.csv"
evolution_csv = "evolution.csv"
output_csv = "pokedex_merged.csv"

# Load the filtered CSV and the evolution CSV
df_filtered = pd.read_csv(filtered_csv)
df_evolution = pd.read_csv(evolution_csv)

# Rename the "Unnamed: 0" column to "id" (if it exists)
if "Unnamed: 0" in df.columns:
    df.rename(columns={"Unnamed: 0": "id"}, inplace=True)

# Normalize the name columns by converting them to lowercase for matching
df_filtered['name_lower'] = df_filtered['name'].str.lower()
df_evolution['name_lower'] = df_evolution['Name'].str.lower()

# Merge the two DataFrames on the normalized name column
df_merged = pd.merge(df_filtered, df_evolution[['name_lower', 'Evolution']], on='name_lower', how='left')

# Optionally, remove the helper column
df_merged.drop(columns=['name_lower'], inplace=True)

# Function to determine if a Pokémon is mega and create a base name accordingly
def get_form(name):
    if "Mega" in name:
        return "mega"
    return "base"

def get_base_name(row):
    if row['form'] == "mega":
        # Remove the "Mega " prefix to get the base name
        return row['name'].replace("Mega ", "")
    return None

# Create a new column 'form' based on the name
df_merged['form'] = df_merged['name'].apply(get_form)

# Create a new column 'base_name' only if the form is mega
df_merged['base_name'] = df_merged.apply(get_base_name, axis=1)

# Save the updated CSV
df_merged.to_csv("pokedex_final.csv", index=False)
print(f"Merged CSV saved as {output_csv}")

Merged CSV saved as pokedex_merged.csv


In [4]:
import pandas as pd
from rdflib import Graph, URIRef, Literal, BNode, Namespace
from rdflib.namespace import RDF, XSD, RDFS, FOAF

# Define Namespaces
PDX = Namespace("http://poked-x.org/pokemon/")
SCHEMA = Namespace("http://schema.org/")

# Create an RDF graph and bind namespaces for clarity
g = Graph()
g.bind("pdx", PDX)
g.bind("schema", SCHEMA)

# Read CSV using pandas
df = pd.read_csv("pokedex_final.csv")

# Build a mapping from normalized Pokémon names to their resource URIs.
pokemon_uri_map = {}
for idx, row in df.iterrows():
    uid = str(row["Unnamed: 0"]).strip()
    poke_uri = URIRef(EX["Pokemon/" + uid])
    name_norm = str(row["name"]).strip().lower()
    pokemon_uri_map[name_norm] = poke_uri

# First pass: create Pokémon resources and add basic properties, types, abilities, and effectiveness
for idx, row in df.iterrows():
    uid = str(row["Unnamed: 0"]).strip()
    name = str(row["name"]).strip()
    poke_uri = URIRef(EX["Pokemon/" + uid])

    try:

        # Explicit typing: mark as a Pokémon
        g.add((poke_uri, RDF.type, EX.Pokemon))
        
        # Add the Pokédex number property if available
        if pd.notna(row["pokedex_number"]):
            g.add((poke_uri, EX.pokedexNumber, Literal(int(row["pokedex_number"]), datatype=XSD.integer)))
    
        # Basic properties
        g.add((poke_uri, SCHEMA.name, Literal(row["name"])))
        g.add((poke_uri, EX.generation, Literal(int(row["generation"]), datatype=XSD.integer)))
        try:
            is_leg = bool(int(row["is_legendary"]))
        except:
            is_leg = False

        g.add((poke_uri, EX.isLegendary, Literal(is_leg, datatype=XSD.boolean)))
        g.add((poke_uri, EX.height, Literal(float(row["height_m"]), datatype=XSD.float)))
        g.add((poke_uri, EX.weight, Literal(float(row["weight_kg"]), datatype=XSD.float)))

        if pd.notna(row["total_points"]):
            total_points = int(row["total_points"])
            g.add((poke_uri, EX.totalPoints, Literal(total_points, datatype=XSD.integer)))
        else:
            # Optionally, add a default value or skip this triple.
            g.add((poke_uri, EX.totalPoints, Literal(0, datatype=XSD.integer)))

        if pd.notna(row["hp"]):
            hp = int(row["hp"])
            g.add((poke_uri, EX.hp, Literal(hp, datatype=XSD.integer)))
        else:
            # Optionally, add a default value or skip this triple.
            g.add((poke_uri, EX.hp, Literal(0, datatype=XSD.integer)))
        

        g.add((poke_uri, EX.attack, Literal(int(row["attack"]), datatype=XSD.integer)))
        g.add((poke_uri, EX.defense, Literal(int(row["defense"]), datatype=XSD.integer)))
        g.add((poke_uri, EX.spAttack, Literal(int(row["sp_attack"]), datatype=XSD.integer)))
        g.add((poke_uri, EX.spDefense, Literal(int(row["sp_defense"]), datatype=XSD.integer)))
        g.add((poke_uri, EX.speed, Literal(int(row["speed"]), datatype=XSD.integer)))

        bf = row["base_friendship"]
        if pd.notna(bf):
            g.add((poke_uri, EX.baseFriendship, Literal(int(bf), datatype=XSD.integer)))
        else:
            # Optionally, you could assign a default value or simply skip adding the triple.
            # Here, we'll assign a default value of 0.
            g.add((poke_uri, EX.baseFriendship, Literal(0, datatype=XSD.integer)))
        
        # Link to Type resources
        type1 = str(row["type_1"]).strip()
        if type1:
            type1_uri = URIRef(EX["Type/" + type1.lower()])
            g.add((poke_uri, EX.primaryType, type1_uri))
            g.add((type1_uri, RDFS.label, Literal(type1)))
        type2 = str(row["type_2"]).strip()
        if type2:
            type2_uri = URIRef(EX["Type/" + type2.lower()])
            g.add((poke_uri, EX.secondaryType, type2_uri))
            g.add((type2_uri, RDFS.label, Literal(type2)))
        
        # Link to Ability resources (skip if missing or NaN)
        ability1 = row["ability_1"]
        if pd.notna(ability1) and str(ability1).strip().lower() != "nan":
            ability1_str = str(ability1).strip()
            ability1_uri = URIRef(EX["Ability/" + ability1_str.lower().replace(" ", "_")])
            g.add((poke_uri, EX.ability1, ability1_uri))
            g.add((ability1_uri, RDFS.label, Literal(ability1_str)))
        ability2 = row["ability_2"]
        if pd.notna(ability2) and str(ability2).strip().lower() != "nan":
            ability2_str = str(ability2).strip()
            ability2_uri = URIRef(EX["Ability/" + ability2_str.lower().replace(" ", "_")])
            g.add((poke_uri, EX.ability2, ability2_uri))
            g.add((ability2_uri, RDFS.label, Literal(ability2_str)))
        
        # Effectiveness multipliers using a blank node
        eff_node = BNode()
        g.add((poke_uri, EX.effectiveness, eff_node))
        effectiveness_attrs = [
            ("against_normal", "againstNormal"),
            ("against_fire", "againstFire"),
            ("against_water", "againstWater"),
            ("against_electric", "againstElectric"),
            ("against_grass", "againstGrass"),
            ("against_ice", "againstIce"),
            ("against_fight", "againstFight"),
            ("against_poison", "againstPoison"),
            ("against_ground", "againstGround"),
            ("against_flying", "againstFlying"),
            ("against_psychic", "againstPsychic"),
            ("against_bug", "againstBug"),
            ("against_rock", "againstRock"),
            ("against_ghost", "againstGhost"),
            ("against_dragon", "againstDragon"),
            ("against_dark", "againstDark"),
            ("against_steel", "againstSteel"),
            ("against_fairy", "againstFairy")
        ]
        for csv_attr, prop_local in effectiveness_attrs:
            val = row[csv_attr]
            g.add((eff_node, EX[prop_local], Literal(float(val), datatype=XSD.float)))

    except Exception as e:
        print(f"Error processing Pokémon {name} (ID: {uid}): {e}")
    continue


# Second pass: Add evolution and mega evolution relationships
for idx, row in df.iterrows():

    uid = str(row["Unnamed: 0"]).strip()
    name = str(row["name"]).strip()
    poke_uri = URIRef(EX["Pokemon/" + uid])
    
    try:
        # Evolution: link to the next evolution if provided
        evolution = str(row["Evolution"]).strip().lower()
        if evolution:
            target_uri = pokemon_uri_map.get(evolution)
            if target_uri:
                g.add((poke_uri, EX.evolvesTo, target_uri))
        
        # Mega Evolution: if the form indicates mega or base_name is provided and differs from the current name
        form = str(row["form"]).strip().lower()
        base_name = str(row["base_name"]).strip().lower()
        name_norm = str(row["name"]).strip().lower()
        if form == "mega" or (base_name and base_name != name_norm):
            base_uri = pokemon_uri_map.get(base_name)
            if base_uri:
                g.add((poke_uri, EX.megaEvolutionOf, base_uri))
    except Exception as e:
        print(f"Error processing evolution for Pokémon {name} (ID: {uid}): {e}")
    continue

# Serialize the RDF graph in Turtle format and display it
turtle_data = g.serialize(format="turtle")
with open("pokemon-rdf.ttl", "w", encoding="utf-8") as f:
    f.write(turtle_data.decode("utf-8") if isinstance(turtle_data, bytes) else turtle_data)



In [None]:
import pandas as pd
import os
import shutil

# --- CONFIG ---
image_folder = "pokemon-images"         # <- Change to your actual path
output_folder = "pokemon-images-index"        # <- Where renamed images will be saved
csv_file = "pokedex_final.csv"
output_csv = "pokedex_with_images.csv"  # <- Updated dataset with has_image column

# Load dataset
df = pd.read_csv(csv_file)

# Create output directory if needed
os.makedirs(output_folder, exist_ok=True)

# Create a new column to track image existence
df["has_image"] = False

# Loop through each Pokémon
for idx, row in df.iterrows():
    try:
        dataset_id = str(row["Unnamed: 0"]).strip()
        pokedex_number = str(int(row["pokedex_number"]))
        form = str(row["form"]).strip().lower()

        # Determine expected filename
        if form.startswith("mega"):
            original_filename = f"{pokedex_number}-{form}.png"  # e.g., 6-mega-x.png
        else:
            original_filename = f"{pokedex_number}.png"

        old_path = os.path.join(image_folder, original_filename)
        new_filename = f"{dataset_id}.png"
        new_path = os.path.join(output_folder, new_filename)

        if os.path.exists(old_path):
            shutil.copy2(old_path, new_path)
            df.at[idx, "has_image"] = True
        else:
            print(f"❌ Missing image for {dataset_id}: {original_filename}")

    except Exception as e:
        print(f"⚠️ Error processing row {idx} ({row['name']}): {e}")

# Save the updated dataset
df.to_csv(output_csv, index=False)
print(f"\n✅ Done! Updated dataset saved to: {output_csv}")
print(f"Renamed images saved to: {output_folder}")

❌ Missing image for ID 194: 154-mega
❌ Missing image for ID 242: 201
❌ Missing image for ID 460: 386
❌ Missing image for ID 461: 386
❌ Missing image for ID 462: 386
❌ Missing image for ID 463: 386
❌ Missing image for ID 489: 412
❌ Missing image for ID 490: 413
❌ Missing image for ID 491: 413
❌ Missing image for ID 492: 413
❌ Missing image for ID 500: 421
❌ Missing image for ID 501: 422
❌ Missing image for ID 502: 423
❌ Missing image for ID 576: 487
❌ Missing image for ID 577: 487
❌ Missing image for ID 582: 492
❌ Missing image for ID 583: 492
❌ Missing image for ID 584: 493
❌ Missing image for ID 642: 550
❌ Missing image for ID 643: 550
❌ Missing image for ID 649: 555
❌ Missing image for ID 650: 555
❌ Missing image for ID 651: 555
❌ Missing image for ID 652: 555
❌ Missing image for ID 683: 585
❌ Missing image for ID 684: 586
❌ Missing image for ID 740: 641
❌ Missing image for ID 741: 641
❌ Missing image for ID 742: 642
❌ Missing image for ID 743: 642
❌ Missing image for ID 746: 645
❌ M