In [15]:
import pandas as pd

In [16]:
pvp = pd.read_csv('final_pokemon_dataset.csv')

In [17]:
pokedex = pd.read_csv('../datasets/pokedex.csv')

In [18]:
pokedex = pokedex[['#','Name','Type','Attack','Defense','HP']]

In [19]:
pokedex['#'] = pokedex['#'].astype(str).str.zfill(4)

In [20]:
pokedex = pokedex.rename(columns={"#":"ID","Name":"NAME","Type":"TYPE","Attack":"BASE_ATTACK","Defense":"BASE_DEFENSE","HP":"BASE_HP"})

In [21]:
def split_types(row):
    types = row['TYPE'].split(' ')
    return pd.Series({
        'TYPE_ONE': types[0],
        'TYPE_TWO': types[1] if len(types) > 1 else None
    })

pokedex[['TYPE_ONE', 'TYPE_TWO']] = pokedex.apply(split_types, axis=1)

In [23]:
pokedex = pokedex.drop(columns='TYPE')

In [25]:
pokedex = pokedex[['ID','NAME','TYPE_ONE','TYPE_TWO','BASE_ATTACK','BASE_DEFENSE','BASE_HP']]

In [27]:
pokedex

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,0001,Bulbasaur,Grass,Poison,118,111,128
1,0002,Ivysaur,Grass,Poison,151,143,155
2,0003,Venusaur,Grass,Poison,198,189,190
3,0003,Venusaur Mega Venusaur,Grass,Poison,241,246,190
4,0004,Charmander,Fire,,116,93,118
...,...,...,...,...,...,...,...
1025,0996,Frigibax,Dragon,Ice,134,86,163
1026,0997,Arctibax,Dragon,Ice,173,128,207
1027,0998,Baxcalibur,Dragon,Ice,254,168,229
1028,0999,Gimmighoul Roaming Form,Ghost,,140,76,128


In [28]:
# Assuming 'pokedex' and 'pvp' are your DataFrames and they both have a 'NAME' column

# Step 1: Remove 'Mega' and 'Primal' Pokémon from 'pokedex'
pokedex = pokedex[~pokedex['NAME'].str.contains('Mega|Primal', na=False)]

# Step 2: Check which Pokémon from 'pokedex_cleaned' are not in 'pvp'
# Assuming both DataFrames have a 'NAME' column to compare
missing_in_pvp = pokedex[~pokedex['NAME'].isin(pvp['NAME'])]

# Output the names of Pokémon that are missing in 'pvp'
print("Pokémon in pokedex not found in pvp:")
print(missing_in_pvp)

Pokémon in pokedex not found in pvp:
       ID                            NAME  TYPE_ONE  TYPE_TWO  BASE_ATTACK  \
25   0019          Rattata Alolan Rattata      Dark    Normal          103   
27   0020        Raticate Alolan Raticate      Dark    Normal          135   
33   0025           Pikachu Pikachu Libre  Electric      None          112   
34   0025                  Pikachu Flying  Electric      None          112   
35   0025                Pikachu Pop Star  Electric      None          112   
..    ...                             ...       ...       ...          ...   
954  0783                        Hakamo-o    Dragon  Fighting          145   
955  0784                         Kommo-o    Dragon  Fighting          222   
979  0865                      Sirfetch'd  Fighting      None          248   
983  0888     Zacian Hero of Many Battles     Fairy      None          254   
984  0889  Zamazenta Hero of Many Battles  Fighting      None          254   

     BASE_DEFENSE  BASE_HP

In [31]:
list(missing_in_pvp['NAME'])

['Rattata Alolan Rattata',
 'Raticate Alolan Raticate',
 'Pikachu Pikachu Libre',
 'Pikachu Flying',
 'Pikachu Pop Star',
 'Pikachu Rock Star',
 'Raichu Alolan Raichu',
 'Sandshrew Alolan Sandshrew',
 'Sandslash Alolan Sandslash',
 'Vulpix Alolan Vulpix',
 'Ninetales Alolan Ninetales',
 'Diglett Alolan Diglett',
 'Dugtrio Alolan Dugtrio',
 'Meowth Alolan Meowth',
 'Meowth Galarian Meowth',
 'Persian Alolan Persian',
 'Growlithe Hisuian Growlithe',
 'Arcanine Hisuian Arcanine',
 'Geodude Alolan Geodude',
 'Graveler Alolan Graveler',
 'Golem Alolan Golem',
 'Ponyta Galarian Ponyta',
 'Rapidash Galarian Rapidash',
 'Slowpoke Galarian Slowpoke',
 'Slowbro Galarian Slowbro',
 "Farfetch'd",
 "Farfetch'd Galarian Farfetch'd",
 'Grimer Alolan Grimer',
 'Muk Alolan Muk',
 'Voltorb Hisuian Voltorb',
 'Electrode Hisuian Electrode',
 'Exeggutor Alolan Exeggutor',
 'Marowak Alolan Marowak',
 'Weezing Galarian Weezing',
 'Mr. Mime Galarian Mr. Mime',
 'Articuno Galarian Articuno',
 'Zapdos Galarian 

In [30]:
def filter_df_by_name(df, substring):
    filtered_df = df[df['NAME'].str.contains(substring, case=False, na=False)]
    return filtered_df

In [53]:
all_shadow_pokemon = filter_df_by_name(pvp,"Shadow")

In [33]:
def drop_rows_by_exact_name(df, pokemon_name):
    # Filter out rows where the 'NAME' column matches the Pokémon name exactly
    filtered_df = df[df['NAME'] != pokemon_name]
    return filtered_df

In [42]:
pvp = drop_rows_by_exact_name(pvp, 'Zamazenta - Hero Of Many Battles')

In [54]:
pvp.to_csv('final_pokemon_dataset.csv')

In [55]:
all_shadow_pokemon.to_csv('final_shadow_pokemon_dataset.csv')

In [63]:
pvp = pvp.drop(columns='Unnamed: 0')

In [64]:
pvp

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,FAST_MOVE,FAST_MOVE_POWER,FAST_MOVE_TYPE,FAST_ENERGY_BOOST,FAST_MOVE_DURATION,CHARGE_MOVE,CHARGE_MOVE_POWER,CHARGED_MOVE_TYPE,CHARGE_MOVE_ENERGY_COST,CHARGE_MOVE_DURATION,DAMAGE WINDOW START
0,1,Bulbasaur,Grass,Poison,Tackle,5.0,Normal,5.0,0.5,Return,35.0,Normal,33.0,0.7,0.10
1,1,Bulbasaur,Grass,Poison,Vine Whip,7.0,Grass,6.0,0.6,Power Whip,90.0,Grass,50.0,2.6,1.25
2,1,Bulbasaur,Grass,Poison,Vine Whip,7.0,Grass,6.0,0.6,Sludge Bomb,80.0,Poison,50.0,2.3,1.10
3,1,Bulbasaur,Grass,Poison,Vine Whip,7.0,Grass,6.0,0.6,Seed Bomb,55.0,Grass,33.0,2.1,1.20
4,1,Bulbasaur,Grass,Poison,Tackle,5.0,Normal,5.0,0.5,Sludge Bomb,80.0,Poison,50.0,2.3,1.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15229,1000,Gholdengo,Steel,Ghost,Astonish,8.0,Ghost,14.0,1.1,Focus Blast,140.0,Fighting,100.0,3.5,3.00
15230,1000,Gholdengo,Steel,Ghost,Astonish,8.0,Ghost,14.0,1.1,Shadow Ball,100.0,Ghost,50.0,3.0,2.40
15231,1000,Gholdengo,Steel,Ghost,Astonish,8.0,Ghost,14.0,1.1,Dazzling Gleam,100.0,Fairy,50.0,3.5,2.10
15232,1000,Gholdengo,Steel,Ghost,Hex,10.0,Ghost,16.0,1.2,Dazzling Gleam,100.0,Fairy,50.0,3.5,2.10


In [69]:
pokedex

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,0001,Bulbasaur,Grass,Poison,118,111,128
1,0002,Ivysaur,Grass,Poison,151,143,155
2,0003,Venusaur,Grass,Poison,198,189,190
4,0004,Charmander,Fire,,116,93,118
5,0005,Charmeleon,Fire,,158,126,151
...,...,...,...,...,...,...,...
1025,0996,Frigibax,Dragon,Ice,134,86,163
1026,0997,Arctibax,Dragon,Ice,173,128,207
1027,0998,Baxcalibur,Dragon,Ice,254,168,229
1028,0999,Gimmighoul Roaming Form,Ghost,,140,76,128


In [66]:
# Step 1: Select relevant columns and drop duplicates
unique_pokemon = pvp[['ID', 'NAME', 'TYPE_ONE', 'TYPE_TWO']].drop_duplicates()

# Step 2: Add columns for BASE_ATTACK, BASE_DEFENSE, BASE_HP with empty values
unique_pokemon['BASE_ATTACK'] = ''
unique_pokemon['BASE_DEFENSE'] = ''
unique_pokemon['BASE_HP'] = ''

In [74]:
unique_pokemon

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,,,
7,1,Shadow Bulbasaur,Grass,Poison,,,
16,2,Ivysaur,Grass,Poison,,,
24,2,Shadow Ivysaur,Grass,Poison,,,
32,3,Shadow Venusaur,Grass,Poison,,,
...,...,...,...,...,...,...,...
15203,996,Frigibax,Dragon,Ice,,,
15211,997,Arctibax,Dragon,Ice,,,
15219,998,Baxcalibur,Dragon,Ice,,,
15227,999,Gimmighoul Roaming Form,Ghost,,,,


In [659]:
merged_df = pd.merge(merged_df, 
                     pokedex[['NAME', 'TYPE_ONE', 'TYPE_TWO', 'BASE_ATTACK', 'BASE_DEFENSE', 'BASE_HP']],
                     on='NAME', 
                     how='left')

In [660]:
merged_df = merged_df.drop(columns=['TYPE_ONE_y','TYPE_TWO_y','BASE_ATTACK_x','BASE_DEFENSE_x'])

In [661]:
merged_df = merged_df.rename(columns={"TYPE_ONE_x":"TYPE_ONE","TYPE_TWO_x":"TYPE_TWO","BASE_ATTACK_y":"BASE_ATTACK","BASE_DEFENSE_y":"BASE_DEFENSE","BASE_HP_y":"BASE_HP"})

In [662]:
merged_df = merged_df.drop(columns=['BASE_HP_x'])

In [663]:
merged_df

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,118.0,111.0,128.0
1,1,Shadow Bulbasaur,Grass,Poison,,,
2,2,Ivysaur,Grass,Poison,151.0,143.0,155.0
3,2,Shadow Ivysaur,Grass,Poison,,,
4,3,Shadow Venusaur,Grass,Poison,,,
...,...,...,...,...,...,...,...
1344,996,Frigibax,Dragon,Ice,134.0,86.0,163.0
1345,997,Arctibax,Dragon,Ice,173.0,128.0,207.0
1346,998,Baxcalibur,Dragon,Ice,254.0,168.0,229.0
1347,999,Gimmighoul Roaming Form,Ghost,,140.0,76.0,128.0


In [664]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1349 entries, 0 to 1348
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ID            1349 non-null   int64  
 1   NAME          1349 non-null   object 
 2   TYPE_ONE      1349 non-null   object 
 3   TYPE_TWO      725 non-null    object 
 4   BASE_ATTACK   953 non-null    float64
 5   BASE_DEFENSE  953 non-null    float64
 6   BASE_HP       953 non-null    float64
dtypes: float64(3), int64(1), object(3)
memory usage: 73.9+ KB


In [665]:
for name in unique_pokemon['NAME']:
    if 'Shadow' in name:
        non_shadow_name = name.replace('Shadow ', '')
        # Check if non-shadow counterpart exists
        if non_shadow_name in pokedex['NAME'].values:
            stats = pokedex.loc[pokedex['NAME'] == non_shadow_name, ['BASE_ATTACK', 'BASE_DEFENSE', 'BASE_HP']].iloc[0]
            merged_df.loc[merged_df['NAME'] == name, 'BASE_ATTACK'] = stats['BASE_ATTACK'] * 1.20
            merged_df.loc[merged_df['NAME'] == name, 'BASE_DEFENSE'] = stats['BASE_DEFENSE'] * 0.80
            merged_df.loc[merged_df['NAME'] == name, 'BASE_HP'] = stats['BASE_HP']  # HP remains the same for Shadow Pokémon

In [666]:
merged_df.shape

(1349, 7)

In [667]:
pokedex.shape

(990, 7)

In [668]:
missing_stats = merged_df[merged_df['BASE_ATTACK'].isna() | merged_df['BASE_DEFENSE'].isna() | merged_df['BASE_HP'].isna()]

In [669]:
missing_pokemon_names = missing_stats['NAME'].unique()

In [670]:
missing_pokemon_names

array(['Black Kyurem', 'Shadow Keldeo', 'Aegislash - Shield', 'Mudbray',
       'Mudsdale', 'Chewtle', 'Sandaconda', 'Tandemaus'], dtype=object)

In [671]:
pokedex_names = set(pokedex['NAME'].unique())

In [672]:
unmatched_pokemon = [name for name in missing_pokemon_names if name not in pokedex_names]

In [673]:
unmatched_pokemon

['Black Kyurem',
 'Shadow Keldeo',
 'Aegislash - Shield',
 'Mudbray',
 'Mudsdale',
 'Chewtle',
 'Sandaconda',
 'Tandemaus']

In [675]:
filter_df_by_name(merged_df,"Aegislash")

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
1092,681,Aegislash - Shield,Steel,Ghost,,,


In [651]:
def adjust_name_casing(name):
    # Correct specific names
    if name == "Oricorio Pa'U Style":
        return "Oricorio Pa'u Style"
    return name  # Return the name unchanged if no corrections are needed

# Apply the function to the 'NAME' column
merged_df['NAME'] = merged_df['NAME'].apply(adjust_name_casing)

In [676]:
filter_df_by_name(pokedex,"Aegislash")

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP


In [264]:
import re
def reformat_names(name):
    # Regex to capture "<name> <identifier> <name>"
    match = re.match(r"(\w+) (\w+) \1", name)
    if match:
        # Reformat to "<identifier> <name>"
        return f"{match.group(2)} {match.group(1)}"
    else:
        return name  # Return the name unchanged if it doesn't match the pattern

# Apply the function to the 'NAME' column
pokedex['NAME'] = pokedex['NAME'].apply(reformat_names)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pokedex['NAME'] = pokedex['NAME'].apply(reformat_names)


In [265]:
merged_df

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,118.0,111.0,128.0
1,1,Shadow Bulbasaur,Grass,Poison,141.6,88.8,128.0
2,2,Ivysaur,Grass,Poison,151.0,143.0,155.0
3,2,Shadow Ivysaur,Grass,Poison,181.2,114.4,155.0
4,3,Shadow Venusaur,Grass,Poison,237.6,151.2,190.0
...,...,...,...,...,...,...,...
1344,996,Frigibax,Dragon,Ice,134.0,86.0,163.0
1345,997,Arctibax,Dragon,Ice,173.0,128.0,207.0
1346,998,Baxcalibur,Dragon,Ice,254.0,168.0,229.0
1347,999,Gimmighoul Roaming Form,Ghost,,140.0,76.0,128.0


In [266]:
condition = (~merged_df['NAME'].str.contains('Pikachu') | (merged_df['NAME'] == 'Pikachu'))
merged_df = merged_df[condition]

In [333]:
def adjust_name_casing(name):
    # Correct specific names
    if name == "Burmy Plant Cloak":
        return "Burmy"
    return name  # Return the name unchanged if no corrections are needed

# Apply the function to the 'NAME' column
pokedex['NAME'] = pokedex['NAME'].apply(adjust_name_casing)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pokedex['NAME'] = pokedex['NAME'].apply(adjust_name_casing)


In [677]:
merged_df

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,118.0,111.0,128.0
1,1,Shadow Bulbasaur,Grass,Poison,141.6,88.8,128.0
2,2,Ivysaur,Grass,Poison,151.0,143.0,155.0
3,2,Shadow Ivysaur,Grass,Poison,181.2,114.4,155.0
4,3,Shadow Venusaur,Grass,Poison,237.6,151.2,190.0
...,...,...,...,...,...,...,...
1344,996,Frigibax,Dragon,Ice,134.0,86.0,163.0
1345,997,Arctibax,Dragon,Ice,173.0,128.0,207.0
1346,998,Baxcalibur,Dragon,Ice,254.0,168.0,229.0
1347,999,Gimmighoul Roaming Form,Ghost,,140.0,76.0,128.0


In [678]:
pokemon_to_drop = [
    'Black Kyurem',
    'Shadow Keldeo',
    'Aegislash - Shield',
    'Mudbray',
    'Mudsdale',
    'Chewtle',
    'Sandaconda',
    'Tandemaus'
]

# Update 'merged_df' by excluding rows with names in the list
merged_df = merged_df[~merged_df['NAME'].isin(pokemon_to_drop)]

In [683]:
merged_df.shape

(1321, 7)

In [681]:
missing_stats = merged_df[
    (merged_df['BASE_ATTACK'].isna() | (merged_df['BASE_ATTACK'] == '')) |
    (merged_df['BASE_DEFENSE'].isna() | (merged_df['BASE_DEFENSE'] == '')) |
    (merged_df['BASE_HP'].isna() | (merged_df['BASE_HP'] == ''))
]

# Display the entries with missing stats to identify them
print("Entries with missing or empty stats:")
print(missing_stats)

Entries with missing or empty stats:
Empty DataFrame
Columns: [ID, NAME, TYPE_ONE, TYPE_TWO, BASE_ATTACK, BASE_DEFENSE, BASE_HP]
Index: []


In [682]:
merged_df = merged_df.drop_duplicates()

In [684]:
merged_df

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,118.0,111.0,128.0
1,1,Shadow Bulbasaur,Grass,Poison,141.6,88.8,128.0
2,2,Ivysaur,Grass,Poison,151.0,143.0,155.0
3,2,Shadow Ivysaur,Grass,Poison,181.2,114.4,155.0
4,3,Shadow Venusaur,Grass,Poison,237.6,151.2,190.0
...,...,...,...,...,...,...,...
1344,996,Frigibax,Dragon,Ice,134.0,86.0,163.0
1345,997,Arctibax,Dragon,Ice,173.0,128.0,207.0
1346,998,Baxcalibur,Dragon,Ice,254.0,168.0,229.0
1347,999,Gimmighoul Roaming Form,Ghost,,140.0,76.0,128.0


In [694]:
import requests
from bs4 import BeautifulSoup

shadows_list = []
# URL of the page to scrape
url = 'https://www.serebii.net/pokemongo/shadowpokemon.shtml'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')

# Find all <td> elements with class "fooinfo" that contain Pokémon names
pokemon_elements = soup.find_all('td', class_='fooinfo')

# Extract the text (names of the Pokémon) from each <td> element contained within <a> tags
pokemon_names = [td.find('a').get_text(strip=True) for td in pokemon_elements if td.find('a')]

# Print the list of Pokémon names
for name in pokemon_names:
    shadows_list.append('Shadow ' + name)  # Prefix 'Shadow' to each name

In [695]:
shadows_list

['Shadow Bulbasaur',
 'Shadow Ivysaur',
 'Shadow Venusaur',
 'Shadow Charmander',
 'Shadow Charmeleon',
 'Shadow Charizard',
 'Shadow Squirtle',
 'Shadow Wartortle',
 'Shadow Blastoise',
 'Shadow Weedle',
 'Shadow Kakuna',
 'Shadow Beedrill',
 'Shadow Pidgey',
 'Shadow Pidgeotto',
 'Shadow Pidgeot',
 'Shadow Rattata',
 'Shadow Raticate',
 'Shadow Ekans',
 'Shadow Arbok',
 'Shadow Sandshrew',
 'Shadow Sandslash',
 'Shadow Nidoran♀',
 'Shadow Nidorina',
 'Shadow Nidoqueen',
 'Shadow Nidoran♂',
 'Shadow Nidorino',
 'Shadow Nidoking',
 'Shadow Vulpix',
 'Shadow Ninetales',
 'Shadow Zubat',
 'Shadow Golbat',
 'Shadow Oddish',
 'Shadow Gloom',
 'Shadow Vileplume',
 'Shadow Venonat',
 'Shadow Venomoth',
 'Shadow Diglett',
 'Shadow Dugtrio',
 'Shadow Meowth',
 'Shadow Persian',
 'Shadow Psyduck',
 'Shadow Golduck',
 'Shadow Growlithe',
 'Shadow Arcanine',
 'Shadow Poliwag',
 'Shadow Poliwhirl',
 'Shadow Poliwrath',
 'Shadow Abra',
 'Shadow Kadabra',
 'Shadow Alakazam',
 'Shadow Machop',
 'Shad

In [696]:
def keep_pokemon(name):
    if 'Shadow' in name and name not in shadows_list:
        return False
    return True

# Apply the function to filter the DataFrame
filtered_merged_df = merged_df[merged_df['NAME'].apply(keep_pokemon)]

In [697]:
filtered_merged_df.shape

(1225, 7)

In [698]:
filtered_merged_df

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,118.0,111.0,128.0
1,1,Shadow Bulbasaur,Grass,Poison,141.6,88.8,128.0
2,2,Ivysaur,Grass,Poison,151.0,143.0,155.0
3,2,Shadow Ivysaur,Grass,Poison,181.2,114.4,155.0
4,3,Shadow Venusaur,Grass,Poison,237.6,151.2,190.0
...,...,...,...,...,...,...,...
1344,996,Frigibax,Dragon,Ice,134.0,86.0,163.0
1345,997,Arctibax,Dragon,Ice,173.0,128.0,207.0
1346,998,Baxcalibur,Dragon,Ice,254.0,168.0,229.0
1347,999,Gimmighoul Roaming Form,Ghost,,140.0,76.0,128.0


In [790]:
final_pokedex = filtered_merged_df.copy()

In [700]:
final_pokedex

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,BASE_ATTACK,BASE_DEFENSE,BASE_HP
0,1,Bulbasaur,Grass,Poison,118.0,111.0,128.0
1,1,Shadow Bulbasaur,Grass,Poison,141.6,88.8,128.0
2,2,Ivysaur,Grass,Poison,151.0,143.0,155.0
3,2,Shadow Ivysaur,Grass,Poison,181.2,114.4,155.0
4,3,Shadow Venusaur,Grass,Poison,237.6,151.2,190.0
...,...,...,...,...,...,...,...
1344,996,Frigibax,Dragon,Ice,134.0,86.0,163.0
1345,997,Arctibax,Dragon,Ice,173.0,128.0,207.0
1346,998,Baxcalibur,Dragon,Ice,254.0,168.0,229.0
1347,999,Gimmighoul Roaming Form,Ghost,,140.0,76.0,128.0


In [704]:
pvp.shape

(15170, 15)

In [702]:
filtered_pvp = pvp[pvp['NAME'].apply(keep_pokemon)]

In [703]:
filtered_pvp.shape

(14372, 15)

In [786]:
final_pokedex = final_pvp_pokedex.copy()

In [788]:
final_pokedex.to_csv('../model_data/final_pokedex.csv')

In [708]:
filtered_pvp.to_csv('../model_data/final_pvp.csv')

In [787]:
final_pokedex.isna().sum()

ID                            0
NAME                          0
TYPE_ONE                      0
TYPE_TWO                   7066
FAST_MOVE                     0
FAST_MOVE_POWER               0
FAST_MOVE_TYPE                0
FAST_ENERGY_BOOST             0
FAST_MOVE_DURATION            0
CHARGE_MOVE                   0
CHARGE_MOVE_POWER             0
CHARGED_MOVE_TYPE             0
CHARGE_MOVE_ENERGY_COST       0
CHARGE_MOVE_DURATION          0
DAMAGE WINDOW START           0
dtype: int64

In [789]:
final_pvp_pokedex.isna().sum()

ID                            0
NAME                          0
TYPE_ONE                      0
TYPE_TWO                   7066
FAST_MOVE                     0
FAST_MOVE_POWER               0
FAST_MOVE_TYPE                0
FAST_ENERGY_BOOST             0
FAST_MOVE_DURATION            0
CHARGE_MOVE                   0
CHARGE_MOVE_POWER             0
CHARGED_MOVE_TYPE             0
CHARGE_MOVE_ENERGY_COST       0
CHARGE_MOVE_DURATION          0
DAMAGE WINDOW START           0
dtype: int64

In [779]:
columns_to_check = [col for col in final_pvp_pokedex.columns if col != 'TYPE_TWO']

# Use DataFrame.applymap to find empty strings and isna() to find NaN values, then any(axis=1) to check each row
rows_with_issues = final_pvp_pokedex[columns_to_check].applymap(lambda x: x == '').any(axis=1) | final_pvp_pokedex[columns_to_check].isna().any(axis=1)

# Filter to get only the problematic rows
problematic_rows = final_pvp_pokedex[rows_with_issues]

  rows_with_issues = final_pvp_pokedex[columns_to_check].applymap(lambda x: x == '').any(axis=1) | final_pvp_pokedex[columns_to_check].isna().any(axis=1)


In [780]:
list(problematic_rows['CHARGE_MOVE'].unique())

[]

In [775]:
final_pvp_pokedex.columns

Index(['ID', 'NAME', 'TYPE_ONE', 'TYPE_TWO', 'FAST_MOVE', 'FAST_MOVE_POWER',
       'FAST_MOVE_TYPE', 'FAST_ENERGY_BOOST', 'FAST_MOVE_DURATION',
       'CHARGE_MOVE', 'CHARGE_MOVE_POWER', 'CHARGED_MOVE_TYPE',
       'CHARGE_MOVE_ENERGY_COST', 'CHARGE_MOVE_DURATION',
       'DAMAGE WINDOW START'],
      dtype='object')

In [769]:
charge_moves = {
    "Body Slam": {"CHARGE_MOVE_POWER": 50.0, "CHARGED_MOVE_TYPE": "Normal", "CHARGE_MOVE_ENERGY_COST": 33.0, "CHARGE_MOVE_DURATION": 1.9, "DAMAGE WINDOW START": 1.2},
    "Weather Ball Ice": {"CHARGE_MOVE_POWER": 55.0, "CHARGED_MOVE_TYPE": "Ice", "CHARGE_MOVE_ENERGY_COST": 33.0, "CHARGE_MOVE_DURATION": 1.6, "DAMAGE WINDOW START": 1.6},
    "Weather Ball Fire": {"CHARGE_MOVE_POWER": 55.0, "CHARGED_MOVE_TYPE": "Fire", "CHARGE_MOVE_ENERGY_COST": 33.0, "CHARGE_MOVE_DURATION": 1.6, "DAMAGE WINDOW START": 1.6},
    "Weather Ball Water": {"CHARGE_MOVE_POWER": 55.0, "CHARGED_MOVE_TYPE": "Water", "CHARGE_MOVE_ENERGY_COST": 33.0, "CHARGE_MOVE_DURATION": 1.6, "DAMAGE WINDOW START": 1.6},
    "Weather Ball Rock": {"CHARGE_MOVE_POWER": 55.0, "CHARGED_MOVE_TYPE": "Rock", "CHARGE_MOVE_ENERGY_COST": 33.0, "CHARGE_MOVE_DURATION": 1.6, "DAMAGE WINDOW START": 1.6},
    "Mud Bomb": {"CHARGE_MOVE_POWER": 55.0, "CHARGED_MOVE_TYPE": "Ground", "CHARGE_MOVE_ENERGY_COST": 33.0, "CHARGE_MOVE_DURATION": 2.3, "DAMAGE WINDOW START": 1.7},
    "Water Pulse": {"CHARGE_MOVE_POWER": 70.0, "CHARGED_MOVE_TYPE": "Water", "CHARGE_MOVE_ENERGY_COST": 50.0, "CHARGE_MOVE_DURATION": 3.2, "DAMAGE WINDOW START": 2.2},
    "Earth Power": {"CHARGE_MOVE_POWER": 100.0, "CHARGED_MOVE_TYPE": "Ground", "CHARGE_MOVE_ENERGY_COST": 50.0, "CHARGE_MOVE_DURATION": 3.6, "DAMAGE WINDOW START": 2.7},
    "Techno Blast (Normal)": {"CHARGE_MOVE_POWER": 120.0, "CHARGED_MOVE_TYPE": "Normal", "CHARGE_MOVE_ENERGY_COST": 100.0, "CHARGE_MOVE_DURATION": 2.0, "DAMAGE WINDOW START": 2.0},
    "Techno Blast (Burn)": {"CHARGE_MOVE_POWER": 120.0, "CHARGED_MOVE_TYPE": "Fire", "CHARGE_MOVE_ENERGY_COST": 100.0, "CHARGE_MOVE_DURATION": 2.0, "DAMAGE WINDOW START": 2.0},
    "Techno Blast (Douse)": {"CHARGE_MOVE_POWER": 120.0, "CHARGED_MOVE_TYPE": "Water", "CHARGE_MOVE_ENERGY_COST": 100.0, "CHARGE_MOVE_DURATION": 2.0, "DAMAGE WINDOW START": 2.0},
    "Techno Blast (Chill)": {"CHARGE_MOVE_POWER": 120.0, "CHARGED_MOVE_TYPE": "Ice", "CHARGE_MOVE_ENERGY_COST": 100.0, "CHARGE_MOVE_DURATION": 2.0, "DAMAGE WINDOW START": 2.0},
    "Techno Blast (Shock)": {"CHARGE_MOVE_POWER": 120.0, "CHARGED_MOVE_TYPE": "Electric", "CHARGE_MOVE_ENERGY_COST": 100.0, "CHARGE_MOVE_DURATION": 2.0, "DAMAGE WINDOW START": 2.0},
    "Techno Blast": {"CHARGE_MOVE_POWER": 120.0, "CHARGED_MOVE_TYPE": "Normal", "CHARGE_MOVE_ENERGY_COST": 100.0, "CHARGE_MOVE_DURATION": 2.0, "DAMAGE WINDOW START": 2.0},
}

In [776]:
# Iterate over each row in your DataFrame
for index, row in final_pvp_pokedex.iterrows():
    # Get the charge move name from the row
    charge_move = row['CHARGE_MOVE']
    
    # Check if the charge move is in the dictionary
    if charge_move in charge_moves:
        # Retrieve the move details from the dictionary
        move_details = charge_moves[charge_move]
        
        # Update the row with the move details if they are missing (NaN)
        for key in move_details:
            if pd.isna(row[key]):
                final_pvp_pokedex.at[index, key] = move_details[key]

# This will fill in the missing details for the charge moves based on the dictionary

In [757]:
fast_moves = {"Mud-Slap": {"FAST_MOVE_POWER":18.0,"FAST_MOVE_TYPE":"Ground","FAST_ENERGY_BOOST":12.0,"FAST_MOVE_DURATION":1.4}}

In [764]:
# Iterate over each row in your DataFrame
for index, row in final_pvp_pokedex.iterrows():
    # Get the charge move name from the row
    fast_move = row['FAST_MOVE']
    
    # Check if the charge move is in the dictionary
    if fast_move in fast_moves:
        # Retrieve the move details from the dictionary
        move_details = fast_moves[fast_move]
        
        # Update the row with the move details if they are missing (NaN)
        for key in move_details:
            if pd.isna(row[key]):
                final_pvp_pokedex.at[index, key] = move_details[key]

# This will fill in the missing details for the charge moves based on the dictionary

In [783]:
final_pvp_pokedex.isna().sum()

ID                            0
NAME                          0
TYPE_ONE                      0
TYPE_TWO                   7066
FAST_MOVE                     0
FAST_MOVE_POWER               0
FAST_MOVE_TYPE                0
FAST_ENERGY_BOOST             0
FAST_MOVE_DURATION            0
CHARGE_MOVE                   0
CHARGE_MOVE_POWER             0
CHARGED_MOVE_TYPE             0
CHARGE_MOVE_ENERGY_COST       0
CHARGE_MOVE_DURATION          0
DAMAGE WINDOW START           0
dtype: int64

In [782]:
problematic_rows

Unnamed: 0,ID,NAME,TYPE_ONE,TYPE_TWO,FAST_MOVE,FAST_MOVE_POWER,FAST_MOVE_TYPE,FAST_ENERGY_BOOST,FAST_MOVE_DURATION,CHARGE_MOVE,CHARGE_MOVE_POWER,CHARGED_MOVE_TYPE,CHARGE_MOVE_ENERGY_COST,CHARGE_MOVE_DURATION,DAMAGE WINDOW START
