In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
df = pd.read_csv('dataset 1/Pokemon.csv')

In [6]:
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [8]:
df.shape

(800, 13)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   Total       800 non-null    int64 
 5   HP          800 non-null    int64 
 6   Attack      800 non-null    int64 
 7   Defense     800 non-null    int64 
 8   Sp. Atk     800 non-null    int64 
 9   Sp. Def     800 non-null    int64 
 10  Speed       800 non-null    int64 
 11  Generation  800 non-null    int64 
 12  Legendary   800 non-null    bool  
dtypes: bool(1), int64(9), object(3)
memory usage: 75.9+ KB


### Technical Questions

##### In total, how many Pokemons are there in the dataset?

In [None]:
# check for pokemon name
pokemon_names = df['Name']

In [None]:
# check for duplicate
pokemon_names.duplicated().sum() # there exist no duplicated name

In [69]:
# count
pokemon_names.count() # There are 800 pokemons

800

##### What is the most common Speed?

In [71]:
# use mode
df['Speed'].mode() # most common speed is 50

0    50
Name: Speed, dtype: int64

In [74]:
# we can use value_counts too
df['Speed'].value_counts(ascending=False) # speed 50 occurs 46 times

Speed
50     46
60     44
70     37
65     36
30     35
       ..
39      1
24      1
82      1
113     1
123     1
Name: count, Length: 108, dtype: int64

##### Which Pokemon has the highest Attack? Be careful to spell the name of the Pokemon correctly, including use of spaces (if any).

In [88]:
df[df['Attack'] == df['Attack'].max()]['Name'] # MewtwoMega Mewtwo X has the Highest Attack

163    MewtwoMega Mewtwo X
Name: Name, dtype: object

##### How many legendary Pokemon are there?

In [95]:
df['Legendary'].value_counts() # 65 Legendary Pokemon

Legendary
False    735
True      65
Name: count, dtype: int64

##### Which Type 1 Flying Pokemon has the highest Sp. Atk? Be careful to spell the name of the Pokemon correctly, including use of spaces (if any).

In [101]:
# filter pokemon to flying pokemon only
type_1_flying_pokemon = df[df['Type 1'] == 'Flying']
type_1_flying_pokemon

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
702,641,TornadusIncarnate Forme,Flying,,580,79,115,70,125,80,111,5,True
703,641,TornadusTherian Forme,Flying,,580,79,100,80,110,90,121,5,True
790,714,Noibat,Flying,Dragon,245,40,30,35,45,40,55,6,False
791,715,Noivern,Flying,Dragon,535,85,70,80,97,80,123,6,False


In [103]:
# filter for max Sp. Atk
type_1_flying_pokemon[type_1_flying_pokemon['Sp. Atk'] == type_1_flying_pokemon['Sp. Atk'].max()]['Name']

702    TornadusIncarnate Forme
Name: Name, dtype: object

##### There are duplicated Pokemon in the dataset. True or false? 

Note that for scenarios where there are two versions of the same Pokemon e.g. “VenusaurMega Venusaur” and “Venusaur”, it does NOT count as duplicated Pokemon.

In [104]:
# check for duplicates
df.duplicated().sum() # no duplicates

0

##### Which generation has the most number of Pokemon?

In [106]:
# counts based on Generation
df['Generation'].value_counts() # Generation 1 has the mosr pokemon

Generation
1    166
5    165
3    160
4    121
2    106
6     82
Name: count, dtype: int64

##### How many columns are there in the dataset?

In [93]:
df.columns

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

In [78]:
len(df.columns) # 13 columns

13

In [80]:
df.shape[1] # shape gives us the (rows,columns)

13

##### In Generation 2, how many Type 2 Fairy Pokemon are there?

In [112]:
# filter based on Generation 2
gen_2 = df[df['Generation'] == 2]
gen_2.head(1)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
166,152,Chikorita,Grass,,318,45,49,65,49,65,45,2,False


In [121]:
# count the number of Type 2 Fairy
gen_2_fairy = gen_2[gen_2['Type 2'] == 'Fairy']

In [122]:
 gen_2_fairy.shape[0] # 3 type 2 fairy

3

##### Which Pokemon has the lowest HP? Be careful to spell the name of the Pokemon correctly, including use of spaces (if any).

In [124]:
df[df['HP'] == df['HP'].min()]['Name']

316    Shedinja
Name: Name, dtype: object