In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # import matplotlib pyplot class
import seaborn as sns # seaborn lib
import plotly.express as px

- [Challenge Link](https://tribegroup.notion.site/AngelHack-Monthly-Code-Challenge-e1b6ea075c1445b8b7ce3ca16d207b5e)

In [2]:
df = pd.read_csv("/kaggle/input/pokemon-dataset/pokemon.csv", index_col=0)
df.head()

Unnamed: 0,image_url,Id,Names,Type1,Type2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,https://img.pokemondb.net/sprites/sword-shield...,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45
1,https://img.pokemondb.net/sprites/sword-shield...,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60
2,https://img.pokemondb.net/sprites/sword-shield...,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80
3,https://img.pokemondb.net/sprites/sword-shield...,3,Venusaur Mega Venusaur,Grass,Poison,625,80,100,123,122,120,80
4,https://img.pokemondb.net/sprites/sword-shield...,4,Charmander,Fire,,309,39,52,43,60,50,65


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1194 entries, 0 to 1193
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   image_url  1194 non-null   object
 1   Id         1194 non-null   int64 
 2   Names      1194 non-null   object
 3   Type1      1194 non-null   object
 4   Type2      652 non-null    object
 5   Total      1194 non-null   int64 
 6   HP         1194 non-null   int64 
 7   Attack     1194 non-null   int64 
 8   Defense    1194 non-null   int64 
 9   Sp. Atk    1194 non-null   int64 
 10  Sp. Def    1194 non-null   int64 
 11  Speed      1194 non-null   int64 
dtypes: int64(8), object(4)
memory usage: 121.3+ KB


In [4]:
df['Type2'].replace(np.nan, "NA",regex=True)

0         Poison
1         Poison
2         Poison
3         Poison
4             NA
          ...   
1189    Fighting
1190      Dragon
1191      Dragon
1192      Dragon
1193     Psychic
Name: Type2, Length: 1194, dtype: object

In [5]:
# Unique type2 categories
df['Type2'].unique()

array(['Poison', nan, 'Flying', 'Dragon', 'Normal', 'Psychic', 'Steel',
       'Ground', 'Fairy', 'Grass', 'Rock', 'Fighting', 'Electric', 'Ice',
       'Dark', 'Ghost', 'Fire', 'Water', 'Bug'], dtype=object)

In [6]:
# Unique type1 categories
df['Type1'].unique()

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Dark', 'Poison',
       'Electric', 'Ground', 'Ice', 'Fairy', 'Steel', 'Fighting',
       'Psychic', 'Rock', 'Ghost', 'Dragon', 'Flying'], dtype=object)

## Data storytelling ideas

1. Visualization of each pokemon's abilities
2. Visualization of Distribution of abilities metrics
3. Top 10 Pokemon in each metrics
4. Defense w.r.t to attack

- Tell the background about each pokemon and what's their story
- What each attacks tell about pokemon
- Who's the most powerful pokemon?
- Who's the least powerful pokemon?

### 1. Define function to visualize each skills of pokemon

- Add images of each pokemon from given link

In [7]:
# define the function to visualize each pokemon chars abilities
def pokemon_skills(df, name):
    cat_val = list(df[df['Names'] == name][['Type1','Type2']].values[0])
    num_val = list(df[df['Names'] == name][['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed']].values[0])
    ps_df = pd.DataFrame({'Metric':['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'],
              'Points':num_val})
    
    fig = px.bar(ps_df, x=ps_df['Metric'], y=ps_df['Points'], color=ps_df['Points'], 
             labels={'color':'Points level'}, height=400)
    
    # update the layout
    fig.update_layout(title=f'Skill levels of Pokemon {name}',
                  xaxis_title='Skills',
                  yaxis_title='Points level',
                  title_font_family="Sitka Small",
                  title_font_color="Black",
                  title_font_size=22,
                  xaxis_title_font_color='black',
                  xaxis_title_font_size=17,
                  yaxis_title_font_color='black',
                  yaxis_title_font_size=17
                )

    fig.show()
    
pokemon_skills(df,'Venusaur')

### 2. Distribution of each metric of pokemon

- Add disciption about each skill like attack, Defense, etc

In [8]:
def dist_plt(df, skill):
    # Create the distribution plot using Plotly
    fig = px.histogram(df,x=skill,
                   nbins=30, marginal='rug', 
                   labels={'value': skill})
    
    # update the layout
    fig.update_layout(title=f'Distribution of {skill}',
                  xaxis_title=skill,
                  yaxis_title='Count',
                  title_font_family="Sitka Small",
                  title_font_color="Black",
                  title_font_size=22,
                  xaxis_title_font_color='black',
                  xaxis_title_font_size=17,
                  yaxis_title_font_color='black',
                  yaxis_title_font_size=17
                 )
    # Show the plot
    fig.show()

dist_plt(df, 'HP') 

### 3. Scatter plot of attack VS defense

- Add description about about resulting plot

In [9]:
# plot the scatter plot func
def scatter(df):
    fig = px.scatter(df, x="Attack", y="Defense",
                   size='Total', hover_data=['Names','Type1'])
    
    # update the layout
    fig.update_layout(title='Pokemon Species: Defense w.r.t to Attack',
                  xaxis_title='Attack',
                  yaxis_title='Defense',
                  title_font_family="Sitka Small",
                  title_font_color="Black",
                  title_font_size=22,
                  xaxis_title_font_color='black',
                  xaxis_title_font_size=17,
                  yaxis_title_font_color='black',
                  yaxis_title_font_size=17
                 )
    fig.show()
scatter(df)

### 4. Top 10 Pokemons in each metric of their skills

- Add the images of all pokemon that shows up in the top10 list

In [10]:
def top10(df, skill):
    Top10 = df.sort_values(by=skill,ascending=False)[:10]
    
    # plot the bar chart
    fig = px.bar(Top10, x='Names', y=skill,
             hover_data=['Type1', 'Total'], color=skill,
             labels={'legend':skill}, height=500)
    # update the layout
    fig.update_layout(title=f'Pokemon Species: Top 10 in {skill} points',
                  xaxis_title='Name of Pokemons',
                  title_font_family="Sitka Small",
                  title_font_color="Black",
                  title_font_size=22,
                  xaxis_title_font_color='black',
                  xaxis_title_font_size=17,
                  yaxis_title_font_color='black',
                  yaxis_title_font_size=17
                 )
    fig.show()
    
top10(df, 'Speed')