In [8]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("./dataset/pokemon.csv")

# Display the first few rows
print(df.head())


   Unnamed: 0                                          image_url  Id  \
0           0  https://img.pokemondb.net/sprites/sword-shield...   1   
1           1  https://img.pokemondb.net/sprites/sword-shield...   2   
2           2  https://img.pokemondb.net/sprites/sword-shield...   3   
3           3  https://img.pokemondb.net/sprites/sword-shield...   3   
4           4  https://img.pokemondb.net/sprites/sword-shield...   4   

                    Names  Type1   Type2  Total  HP  Attack  Defense  Sp. Atk  \
0               Bulbasaur  Grass  Poison    318  45      49       49       65   
1                 Ivysaur  Grass  Poison    405  60      62       63       80   
2                Venusaur  Grass  Poison    525  80      82       83      100   
3  Venusaur Mega Venusaur  Grass  Poison    625  80     100      123      122   
4              Charmander   Fire     NaN    309  39      52       43       60   

   Sp. Def  Speed  
0       65     45  
1       80     60  
2      100     80  


In [9]:
# Check the dimensions of the dataset
print("Dataset Dimensions:", df.shape)

Dataset Dimensions: (1194, 13)


In [10]:
# Summary statistics
print(df.describe())

        Unnamed: 0           Id        Total           HP       Attack  \
count  1194.000000  1194.000000  1194.000000  1194.000000  1194.000000   
mean    596.500000   492.746231   441.206868    70.883585    80.948911   
std     344.822418   293.719541   121.015326    26.861740    32.126164   
min       0.000000     1.000000   175.000000     1.000000     5.000000   
25%     298.250000   235.250000   330.000000    52.000000    56.000000   
50%     596.500000   486.500000   460.500000    70.000000    80.000000   
75%     894.750000   741.750000   520.000000    85.000000   100.000000   
max    1193.000000  1010.000000  1125.000000   255.000000   190.000000   

           Defense      Sp. Atk      Sp. Def        Speed  
count  1194.000000  1194.000000  1194.000000  1194.000000  
mean     74.587102    72.881072    72.123953    69.782245  
std      30.678626    32.702411    27.628412    30.200828  
min       5.000000    10.000000    20.000000     5.000000  
25%      51.250000    50.000000  

In [11]:
# Check the data types of each column
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1194 entries, 0 to 1193
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  1194 non-null   int64 
 1   image_url   1194 non-null   object
 2   Id          1194 non-null   int64 
 3   Names       1194 non-null   object
 4   Type1       1194 non-null   object
 5   Type2       652 non-null    object
 6   Total       1194 non-null   int64 
 7   HP          1194 non-null   int64 
 8   Attack      1194 non-null   int64 
 9   Defense     1194 non-null   int64 
 10  Sp. Atk     1194 non-null   int64 
 11  Sp. Def     1194 non-null   int64 
 12  Speed       1194 non-null   int64 
dtypes: int64(9), object(4)
memory usage: 121.4+ KB
None


In [12]:
# Checking for missing values
print(df.isnull().sum())

Unnamed: 0      0
image_url       0
Id              0
Names           0
Type1           0
Type2         542
Total           0
HP              0
Attack          0
Defense         0
Sp. Atk         0
Sp. Def         0
Speed           0
dtype: int64


In [14]:
total_pokemon = len(df)
print("Total number of Pokémon:", total_pokemon)

Total number of Pokémon: 1194


In [19]:
# Count the frequency of each type
type1_counts = df['Type1'].value_counts()
type2_counts = df['Type2'].value_counts()

# Display the types and their frequencies
print("Primary Types of Pokémon:")
print(type1_counts)

print("\nSecondary Types of Pokémon:")
print(type2_counts)

Primary Types of Pokémon:
Type1
Water       150
Normal      131
Grass       105
Bug          91
Psychic      82
Fire         75
Electric     73
Rock         67
Dark         56
Fighting     50
Dragon       49
Ghost        47
Ground       46
Poison       45
Ice          43
Steel        43
Fairy        31
Flying       10
Name: count, dtype: int64

Secondary Types of Pokémon:
Type2
Flying      122
Psychic      49
Poison       47
Ground       43
Fairy        42
Fighting     41
Steel        40
Dragon       39
Ghost        37
Grass        33
Dark         33
Water        25
Ice          22
Fire         20
Rock         19
Normal       18
Electric     13
Bug           9
Name: count, dtype: int64


In [18]:
# Print the column names
print(df.columns)

Index(['Unnamed: 0', 'image_url', 'Id', 'Names', 'Type1', 'Type2', 'Total',
       'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed'],
      dtype='object')


In [20]:
# Calculate descriptive statistics for base stats
base_stats = df[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]
stats_summary = base_stats.describe()

# Display the summary statistics
print("Summary Statistics for Base Stats:")
print(stats_summary)

Summary Statistics for Base Stats:
                HP       Attack      Defense      Sp. Atk      Sp. Def  \
count  1194.000000  1194.000000  1194.000000  1194.000000  1194.000000   
mean     70.883585    80.948911    74.587102    72.881072    72.123953   
std      26.861740    32.126164    30.678626    32.702411    27.628412   
min       1.000000     5.000000     5.000000    10.000000    20.000000   
25%      52.000000    56.000000    51.250000    50.000000    50.000000   
50%      70.000000    80.000000    70.000000    65.000000    70.000000   
75%      85.000000   100.000000    90.000000    95.000000    90.000000   
max     255.000000   190.000000   250.000000   194.000000   250.000000   

             Speed  
count  1194.000000  
mean     69.782245  
std      30.200828  
min       5.000000  
25%      45.000000  
50%      67.500000  
75%      90.750000  
max     200.000000  
