In [21]:
import pandas as pd

df = pd.read_csv("habitats.csv", index_col="Unnamed: 0")
df

Unnamed: 0,species,habitat
0,Bulbasaur,Kantonian forests and jungles
1,Ivysaur,Kantonian forests and jungles
2,Venusaur,Kantonian forests and jungles
3,Charmander,Kantonian mountains and volcanoes
4,Charmeleon,Kantonian mountains and volcanoes
...,...,...
1003,Chi-Yu,Ruins
1004,Roaring Moon,Area Zero
1005,Iron Valiant,Area Zero
1006,Koraidon,Anywhere


In [22]:
dex = pd.read_csv("../dex/dex.csv")
dex = dex[dex.num > 0]
dex = dex[["num", "name", "baseSpecies", "prevo", "legendary"]]
dex.baseSpecies.fillna(value=dex.name, inplace=True)
dex

Unnamed: 0,num,name,baseSpecies,prevo,legendary
0,1,Bulbasaur,Bulbasaur,,
1,2,Ivysaur,Ivysaur,Bulbasaur,
2,3,Venusaur,Venusaur,Ivysaur,
3,3,Venusaur-Mega,Venusaur,,
4,3,Venusaur-Gmax,Venusaur,,
...,...,...,...,...,...
1295,1006,Iron Valiant,Iron Valiant,,Paradox
1296,1007,Koraidon,Koraidon,,Restricted Legendary
1297,1008,Miraidon,Miraidon,,Restricted Legendary
1298,1009,Walking Wake,Walking Wake,,Paradox


In [23]:
dex = dex.merge(right=df, how="left", left_on="baseSpecies", right_on="species")
dex.drop(columns="species", inplace=True)
dex

Unnamed: 0,num,name,baseSpecies,prevo,legendary,habitat
0,1,Bulbasaur,Bulbasaur,,,Kantonian forests and jungles
1,2,Ivysaur,Ivysaur,Bulbasaur,,Kantonian forests and jungles
2,3,Venusaur,Venusaur,Ivysaur,,Kantonian forests and jungles
3,3,Venusaur-Mega,Venusaur,,,Kantonian forests and jungles
4,3,Venusaur-Gmax,Venusaur,,,Kantonian forests and jungles
...,...,...,...,...,...,...
1295,1006,Iron Valiant,Iron Valiant,,Paradox,Area Zero
1296,1007,Koraidon,Koraidon,,Restricted Legendary,Anywhere
1297,1008,Miraidon,Miraidon,,Restricted Legendary,Anywhere
1298,1009,Walking Wake,Walking Wake,,Paradox,


In [24]:
import re

habitat_map = {
    "grassland": "field|grassland|plain|meadow|scrubland",
    "forest": "forest|jungle|woodland",
    "waters-edge": "beach|lake|estuarie|pond|riverside|swamp|tropical river|wetland",
    "sea": "ocean|sea",
    "cave": "cave",
    "mountain": "mountain|volcano",
    "rough-terrain": "badland|wasteland|desert|rocky|ruin",
    "urban": "building|computer|human|sewer|power plant|urban",
    "rare": "_"
}

for habitat, keywords in habitat_map.items():
    dex[habitat] = dex.habitat.str.contains(keywords, flags=re.IGNORECASE, regex=True)

dex.loc[dex.prevo == "Eevee", "urban"] = True
dex.loc[~dex.legendary.isna(), "rare"] = True
dex.loc[dex.baseSpecies == "Unown", "rare"] = True
dex.loc[dex.baseSpecies == "Castform", "grassland"] = True

dex["has_habitat"] = dex.any(bool_only=True, axis=1)
dex


Unnamed: 0,num,name,baseSpecies,prevo,legendary,habitat,grassland,forest,waters-edge,sea,cave,mountain,rough-terrain,urban,rare,has_habitat
0,1,Bulbasaur,Bulbasaur,,,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,False
1,2,Ivysaur,Ivysaur,Bulbasaur,,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,False
2,3,Venusaur,Venusaur,Ivysaur,,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,False
3,3,Venusaur-Mega,Venusaur,,,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,False
4,3,Venusaur-Gmax,Venusaur,,,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1295,1006,Iron Valiant,Iron Valiant,,Paradox,Area Zero,False,False,False,False,False,False,False,False,True,False
1296,1007,Koraidon,Koraidon,,Restricted Legendary,Anywhere,False,False,False,False,False,False,False,False,True,False
1297,1008,Miraidon,Miraidon,,Restricted Legendary,Anywhere,False,False,False,False,False,False,False,False,True,False
1298,1009,Walking Wake,Walking Wake,,Paradox,,,,,,,,,,True,False
