In [348]:
import pandas as pd
import numpy as np
import duckdb

In [349]:
input = 'Pokemon Input.xlsx'
sheets = pd.ExcelFile(input).sheet_names
sheets

['Pokemon', 'Evolution']

In [350]:
pokemon = pd.read_excel(input, 'Pokemon')
# Correct data type
pokemon['#'] = pokemon['#'].astype(float)
pokemon.columns = pokemon.columns.str.replace(" ", "_").str.lower()
pokemon.rename(columns={'#':'no'}, inplace=True)
print(pokemon.head(5))
print(pokemon.info())
print(pokemon.isnull().any())

    no       name    type  total  hp  attack  defense  special_attack  \
0  1.0  Bulbasaur   GRASS    318  45      49       49              65   
1  1.0  Bulbasaur  POISON    318  45      49       49              65   
2  2.0    Ivysaur   GRASS    405  60      62       63              80   
3  2.0    Ivysaur  POISON    405  60      62       63              80   
4  3.0   Venusaur   GRASS    525  80      82       83             100   

   special_defense  speed  
0               65     45  
1               65     45  
2               80     60  
3               80     60  
4              100     80  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1168 entries, 0 to 1167
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   no               1168 non-null   float64
 1   name             1168 non-null   object 
 2   type             1168 non-null   object 
 3   total            1168 non-null   int64  
 4   hp   

In [351]:
evolution = pd.read_excel(input, 'Evolution')
evolution.columns = evolution.columns.str.replace(" ", "_").str.lower()
print(evolution.head(5))
print(evolution.info())
print(evolution.isnull().any())

  evolving_from evolving_to  level condition evolution_type
0     Bulbasaur     Ivysaur   16.0       NaN          Level
1       Ivysaur    Venusaur   32.0       NaN          Level
2    Charmander  Charmeleon   16.0       NaN          Level
3    Charmeleon   Charizard   36.0       NaN          Level
4      Squirtle   Wartortle   16.0       NaN          Level
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 390 entries, 0 to 389
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   evolving_from   380 non-null    object 
 1   evolving_to     390 non-null    object 
 2   level           268 non-null    float64
 3   condition       105 non-null    object 
 4   evolution_type  390 non-null    object 
dtypes: float64(1), object(4)
memory usage: 15.4+ KB
None
evolving_from      True
evolving_to       False
level              True
condition          True
evolution_type    False
dtype: bool


In [352]:
# Our Pokémon dataset actually contains too many Pokémon:
# We're only interested in Pokémon up to Generation III, which is up to (and including) number 386
# This means we're also not interested in mega evolutions so we can filter Pokémon whose name start with "Mega"

# pokemon = pokemon[pokemon['#'] <= 386]
# pokemon = pokemon[~pokemon['Name'].str.startswith('Mega ')]
# pokemon.rename['#']
pokemon_sql = duckdb.sql(""" 
                        select * 
                        from pokemon 
                        where no <= 386 
                        and name not like 'Mega %'
                         """).to_df()
pokemon_sql.head(5)

Unnamed: 0,no,name,type,total,hp,attack,defense,special_attack,special_defense,speed
0,1.0,Bulbasaur,GRASS,318,45,49,49,65,65,45
1,1.0,Bulbasaur,POISON,318,45,49,49,65,65,45
2,2.0,Ivysaur,GRASS,405,60,62,63,80,80,60
3,2.0,Ivysaur,POISON,405,60,62,63,80,80,60
4,3.0,Venusaur,GRASS,525,80,82,83,100,100,80


In [353]:
# Some Pokémon have more than one Type. 
# We aren't interested in Types for this challenge so remove this field 
# and ensure we have one row per Pokémon
pokemon_sql = duckdb.sql("""
                        select avg(no) as no, name, avg(total) total, avg(hp) hp, avg(attack) attack, avg(defense) defense, 
                        avg(special_attack) special_attack, avg(special_defense) special_defense, avg(speed) speed
                        from pokemon_sql
                        group by name
                        """).to_df()
pokemon_sql

Unnamed: 0,no,name,total,hp,attack,defense,special_attack,special_defense,speed
0,1.0,Bulbasaur,318.0,45.0,49.0,49.0,65.0,65.0,45.0
1,2.0,Ivysaur,405.0,60.0,62.0,63.0,80.0,80.0,60.0
2,3.0,Venusaur,525.0,80.0,82.0,83.0,100.0,100.0,80.0
3,4.0,Charmander,309.0,39.0,52.0,43.0,60.0,50.0,65.0
4,5.0,Charmeleon,405.0,58.0,64.0,58.0,80.0,65.0,80.0
...,...,...,...,...,...,...,...,...,...
381,369.0,Relicanth,485.0,100.0,90.0,130.0,45.0,65.0,55.0
382,373.0,Salamence,600.0,95.0,135.0,80.0,110.0,80.0,100.0
383,189.0,Jumpluff,450.0,75.0,55.0,70.0,55.0,85.0,110.0
384,284.0,Masquerain,414.0,70.0,60.0,62.0,80.0,82.0,60.0


In [354]:
evolution.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 390 entries, 0 to 389
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   evolving_from   380 non-null    object 
 1   evolving_to     390 non-null    object 
 2   level           268 non-null    float64
 3   condition       105 non-null    object 
 4   evolution_type  390 non-null    object 
dtypes: float64(1), object(4)
memory usage: 15.4+ KB


In [355]:
# Now we want to bring in information about what our Pokémon evolve to
# Warning!  In our Evolution dataset, we still have Pokémon beyond Gen III. 
# You'll need to filter these out too, from both the evolved from and evolved to fields

evolution_sql = duckdb.sql(""" 
                            select c.*
                            from
                                (select a.*
                                from evolution a
                                inner join pokemon_sql b
                                on a.evolving_from = b.name) as c
                            inner join pokemon_sql d
                            on c.evolving_to = d.name
                            """).to_df()
evolution_sql

Unnamed: 0,evolving_from,evolving_to,level,condition,evolution_type
0,Ivysaur,Venusaur,32.0,,Level
1,Squirtle,Wartortle,16.0,,Level
2,Wartortle,Blastoise,36.0,,Level
3,Metapod,Butterfree,10.0,,Level
4,Pidgey,Pidgeotto,18.0,,Level
...,...,...,...,...,...
203,Spinarak,Ariados,22.0,,Level
204,Snubbull,Granbull,23.0,,Level
205,Houndour,Houndoom,24.0,,Level
206,Togepi,Togetic,,,Happiness


In [356]:
# Bring in information about what a Pokémon evolves from
# Ensure that we have all 386 of our Pokémon, with nulls if they don't 
# have a pre-evolved form or if they don't evolve
detail_df = duckdb.sql("""
                        select avg(a.no) as no, a.name, avg(a.total) total, avg(a.hp) hp, avg(a.attack) attack, avg(a.defense) defense, avg(a.special_attack) special_attack
                        , avg(a.special_defense) as special_defense, avg(a.speed) as speed, b.evolving_from, a.evolving_to, avg(a.level) as level, a.condition, a.evolution_type
                        from 
                            (select no, name, total, hp, attack, defense, special_attack, special_defense, speed, evolving_to, level, condition, evolution_type
                            from pokemon_sql p
                            left join evolution_sql e
                            on p.name = e.evolving_from) a
                        left join
                            (select evolving_from, evolving_to
                            from evolution_sql) b
                        on a.name = b.evolving_to
                        group by a.name, a.condition, a.evolution_type, a.evolving_to, b.evolving_from
                        """).to_df()
detail_df

Unnamed: 0,no,name,total,hp,attack,defense,special_attack,special_defense,speed,evolving_from,evolving_to,level,condition,evolution_type
0,1.0,Bulbasaur,318.0,45.0,49.0,49.0,65.0,65.0,45.0,,Ivysaur,16.0,,Level
1,4.0,Charmander,309.0,39.0,52.0,43.0,60.0,50.0,65.0,,Charmeleon,16.0,,Level
2,7.0,Squirtle,314.0,44.0,48.0,65.0,50.0,64.0,43.0,,Wartortle,16.0,,Level
3,10.0,Caterpie,195.0,45.0,30.0,35.0,20.0,20.0,45.0,,Metapod,7.0,,Level
4,13.0,Weedle,195.0,40.0,35.0,30.0,20.0,20.0,50.0,,Kakuna,7.0,,Level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,326.0,Grumpig,470.0,80.0,45.0,65.0,90.0,110.0,80.0,Spoink,,,,
393,376.0,Metagross,600.0,80.0,135.0,130.0,95.0,90.0,70.0,Metang,,,,
394,79.0,Slowpoke,315.0,90.0,65.0,65.0,40.0,40.0,15.0,,Slowking,,King's Rock,Trade
395,290.0,Nincada,266.0,31.0,45.0,90.0,30.0,30.0,40.0,,Ninjask,20.0,,Level


In [357]:
# Create a calculation for our Evolution Group
# The Evolution Group will be named after the First Evolution e.g. in the above example,
# Bulbasaur is the name of the Evolution Group

output = duckdb.sql("""
                        select no, name, total, hp, attack, defense, special_attack, special_defense, speed, evolving_from, evolving_to, level, condition, evolution_type,
                            case 
                                when first_evolution is not null then first_evolution
                                when first_evolution is null then
                                    case
                                        when evolving_from is not null then evolving_from
                                        when evolving_from is null then name
                                    end
                            end as evolution_group
                        from
                            ( select a.*, b.evolving_from as first_evolution
                            from detail_df a
                            left join
                                (
                                select name, evolving_from, evolving_to
                                from detail_df
                                ) b
                            on a.name = b.evolving_to
                            order by no asc ) c
                        """).to_df()
output

Unnamed: 0,no,name,total,hp,attack,defense,special_attack,special_defense,speed,evolving_from,evolving_to,level,condition,evolution_type,evolution_group
0,1.0,Bulbasaur,318.0,45.0,49.0,49.0,65.0,65.0,45.0,,Ivysaur,16.0,,Level,Bulbasaur
1,2.0,Ivysaur,405.0,60.0,62.0,63.0,80.0,80.0,60.0,Bulbasaur,Venusaur,32.0,,Level,Bulbasaur
2,3.0,Venusaur,525.0,80.0,82.0,83.0,100.0,100.0,80.0,Ivysaur,,,,,Bulbasaur
3,4.0,Charmander,309.0,39.0,52.0,43.0,60.0,50.0,65.0,,Charmeleon,16.0,,Level,Charmander
4,5.0,Charmeleon,405.0,58.0,64.0,58.0,80.0,65.0,80.0,Charmander,Charizard,36.0,,Level,Charmander
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,382.0,Kyogre,670.0,100.0,100.0,90.0,150.0,140.0,90.0,,,,,,Kyogre
393,383.0,Groudon,670.0,100.0,150.0,140.0,100.0,90.0,90.0,,,,,,Groudon
394,384.0,Rayquaza,680.0,105.0,150.0,90.0,150.0,90.0,95.0,,,,,,Rayquaza
395,385.0,Jirachi,600.0,100.0,100.0,100.0,100.0,100.0,100.0,,,,,,Jirachi
