In [12]:
import pandas as pd

df = pd.read_csv("habitats.csv", index_col="Unnamed: 0")
subs = [("♀", "-F"), ("♂", "-M"), ("'", "’"), ("Flabébé", "Flabébé"), ("Daschsbun", "Dachsbun")]
for old, new in subs:
    df["species"] = df.species.str.replace(old, new)
df

Unnamed: 0,species,habitat
0,Bulbasaur,Kantonian forests and jungles
1,Ivysaur,Kantonian forests and jungles
2,Venusaur,Kantonian forests and jungles
3,Charmander,Kantonian mountains and volcanoes
4,Charmeleon,Kantonian mountains and volcanoes
...,...,...
1003,Chi-Yu,Ruins
1004,Roaring Moon,Area Zero
1005,Iron Valiant,Area Zero
1006,Koraidon,Anywhere


In [13]:
dex = pd.read_csv("../dex/dex.csv")
dex = dex[dex.num > 0]
dex = dex[~dex.forme.isin(["Mega", "Mega-X", "Mega-Y", "Primal", "Gmax"])]
dex = dex[["num", "name", "baseSpecies", "prevo", "legendary"]]
dex.baseSpecies.fillna(value=dex.name, inplace=True)
dex = dex.merge(right=dex[["name", "legendary"]], how="left", left_on="baseSpecies", right_on="name", suffixes=["_1", None])
# dex.drop(columns=["name_1", "legendary_1"], inplace=True)
dex

Unnamed: 0,num,name_1,baseSpecies,prevo,legendary_1,name,legendary
0,1,Bulbasaur,Bulbasaur,,,Bulbasaur,
1,2,Ivysaur,Ivysaur,Bulbasaur,,Ivysaur,
2,3,Venusaur,Venusaur,Ivysaur,,Venusaur,
3,4,Charmander,Charmander,,,Charmander,
4,5,Charmeleon,Charmeleon,Charmander,,Charmeleon,
...,...,...,...,...,...,...,...
1213,1006,Iron Valiant,Iron Valiant,,Paradox,Iron Valiant,Paradox
1214,1007,Koraidon,Koraidon,,Restricted Legendary,Koraidon,Restricted Legendary
1215,1008,Miraidon,Miraidon,,Restricted Legendary,Miraidon,Restricted Legendary
1216,1009,Walking Wake,Walking Wake,,Paradox,Walking Wake,Paradox


In [14]:
dex.columns

Index(['num', 'name_1', 'baseSpecies', 'prevo', 'legendary_1', 'name',
       'legendary'],
      dtype='object')

In [15]:
dex = dex.merge(right=df, how="left", left_on="baseSpecies", right_on="species")
dex

Unnamed: 0,num,name_1,baseSpecies,prevo,legendary_1,name,legendary,species,habitat
0,1,Bulbasaur,Bulbasaur,,,Bulbasaur,,Bulbasaur,Kantonian forests and jungles
1,2,Ivysaur,Ivysaur,Bulbasaur,,Ivysaur,,Ivysaur,Kantonian forests and jungles
2,3,Venusaur,Venusaur,Ivysaur,,Venusaur,,Venusaur,Kantonian forests and jungles
3,4,Charmander,Charmander,,,Charmander,,Charmander,Kantonian mountains and volcanoes
4,5,Charmeleon,Charmeleon,Charmander,,Charmeleon,,Charmeleon,Kantonian mountains and volcanoes
...,...,...,...,...,...,...,...,...,...
1213,1006,Iron Valiant,Iron Valiant,,Paradox,Iron Valiant,Paradox,Iron Valiant,Area Zero
1214,1007,Koraidon,Koraidon,,Restricted Legendary,Koraidon,Restricted Legendary,Koraidon,Anywhere
1215,1008,Miraidon,Miraidon,,Restricted Legendary,Miraidon,Restricted Legendary,Miraidon,Anywhere
1216,1009,Walking Wake,Walking Wake,,Paradox,Walking Wake,Paradox,,


In [16]:
import re

habitat_map = {
    "grassland": "field|grassland|plain|meadow|scrubland",
    "forest": "forest|jungle|woodland",
    "waters-edge": "beach|lake|estuarie|pond|riverside|swamp|tropical river|wetland",
    "sea": "coast|ocean|sea",
    "cave": "cave",
    "mountain": "glacier|mountain|peak|tundra|volcano",
    "rough-terrain": "badland|wasteland|desert|rocky|ruin",
    "urban": "building|computer|human|sewer|power plant|urban",
    "rare": "unknown"
}

for habitat, keywords in habitat_map.items():
    dex[habitat] = dex.habitat.str.contains(keywords, flags=re.IGNORECASE, regex=True)

dex.loc[dex.prevo == "Eevee", "urban"] = True
dex.loc[~dex.legendary.isna(), list(habitat_map)] = False
dex.loc[~dex.legendary.isna(), "rare"] = True
dex.loc[dex.baseSpecies == "Unown", "rare"] = True
dex.loc[dex.baseSpecies == "Castform", "grassland"] = True

dex["habitat_count"] = dex[list(habitat_map)].sum(axis=1)
dex


Unnamed: 0,num,name_1,baseSpecies,prevo,legendary_1,name,legendary,species,habitat,grassland,forest,waters-edge,sea,cave,mountain,rough-terrain,urban,rare,habitat_count
0,1,Bulbasaur,Bulbasaur,,,Bulbasaur,,Bulbasaur,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,1.0
1,2,Ivysaur,Ivysaur,Bulbasaur,,Ivysaur,,Ivysaur,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,1.0
2,3,Venusaur,Venusaur,Ivysaur,,Venusaur,,Venusaur,Kantonian forests and jungles,False,True,False,False,False,False,False,False,False,1.0
3,4,Charmander,Charmander,,,Charmander,,Charmander,Kantonian mountains and volcanoes,False,False,False,False,False,True,False,False,False,1.0
4,5,Charmeleon,Charmeleon,Charmander,,Charmeleon,,Charmeleon,Kantonian mountains and volcanoes,False,False,False,False,False,True,False,False,False,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1213,1006,Iron Valiant,Iron Valiant,,Paradox,Iron Valiant,Paradox,Iron Valiant,Area Zero,False,False,False,False,False,False,False,False,True,1.0
1214,1007,Koraidon,Koraidon,,Restricted Legendary,Koraidon,Restricted Legendary,Koraidon,Anywhere,False,False,False,False,False,False,False,False,True,1.0
1215,1008,Miraidon,Miraidon,,Restricted Legendary,Miraidon,Restricted Legendary,Miraidon,Anywhere,False,False,False,False,False,False,False,False,True,1.0
1216,1009,Walking Wake,Walking Wake,,Paradox,Walking Wake,Paradox,,,False,False,False,False,False,False,False,False,True,1.0


In [17]:
dex["habitat_final"] = dex.habitat
dex["habitat_final"] = dex.habitat_final.str.replace("Kanto\w* ", "", regex=True)
dex["habitat_final"] = dex.habitat_final.str.replace("(\w*)(,| and) .*", "\g<1>", regex=True)
dex["habitat_final"].fillna("Rare", inplace=True)

In [18]:
habitat_map = {
    "Grassland": "areas with unpredictable weather|farms|gardens|grasslands|fields|meadows|pastures|plains|prairies|savannas",
    "Forest": "boreal|forest|jungles|montane|temperate|woodlands|woods",
    "Water's-edge": "beaches|lakes|marshes|ponds|rivers|streams|swamps|wetlands",
    "Sea": "coastal cliffs|cold|ocean|polar|pools|reefs|sea",
    "Cave": "cave|damp|mines",
    "Mountain": "clear skies|highlands|glaciers|mountains|peaks|peatlands|stratosphere|volcanoes",
    "Rough-terrain": "badlands|canyons|cliffs|deserts|rocky areas|ruins|wastelands",
    "Urban": "alleys|anywhere|buildings|cities|computers|heavily-polluted|graveyards|landfills|power plants|remote areas|urban",
    "Rare": "unknown",
}

for habitat, keywords in habitat_map.items():
    dex.loc[dex.habitat_final.str.contains(keywords, flags=re.IGNORECASE, regex=True), "habitat_final"] = habitat

dex.loc[dex.prevo == "Eevee", "habitat_final"] = "Urban"
dex.loc[~dex.legendary.isna(), "habitat_final"] = "Rare"
dex.habitat_final.value_counts()

Forest           296
Rare             191
Grassland        167
Water's-edge     130
Urban            107
Mountain          95
Cave              81
Rough-terrain     78
Sea               73
Name: habitat_final, dtype: int64

In [19]:
# out = pd.read_csv("template.csv", encoding="utf-8")
# out["Species"] = out.Species.str.replace("(.*) - (F|M)", "\g<1>", regex=True)
# out = out.merge(right=dex[["name", "habitat", "habitat_final"]], how="left", left_on="Species", right_on="name")
dex[["name", "habitat", "habitat_final"]].to_csv("output.csv", index=False)

In [20]:
dex[dex.baseSpecies=="Meowth"]

Unnamed: 0,num,name_1,baseSpecies,prevo,legendary_1,name,legendary,species,habitat,grassland,forest,waters-edge,sea,cave,mountain,rough-terrain,urban,rare,habitat_count,habitat_final
76,52,Meowth,Meowth,,,Meowth,,Meowth,"Urban towns and cities, forests",False,True,False,False,False,False,False,True,False,2.0,Urban
77,52,Meowth-Alola,Meowth,,,Meowth,,Meowth,"Urban towns and cities, forests",False,True,False,False,False,False,False,True,False,2.0,Urban
78,52,Meowth-Galar,Meowth,,,Meowth,,Meowth,"Urban towns and cities, forests",False,True,False,False,False,False,False,True,False,2.0,Urban


In [21]:
dex[dex.name=="Rockruff"]

Unnamed: 0,num,name_1,baseSpecies,prevo,legendary_1,name,legendary,species,habitat,grassland,forest,waters-edge,sea,cave,mountain,rough-terrain,urban,rare,habitat_count,habitat_final
891,744,Rockruff,Rockruff,,,Rockruff,,Rockruff,"Forests, mountains, badlands, deserts",False,True,False,False,False,True,True,False,False,3.0,Forest
