# Proyecto de pokemon (con pyspark)

In [0]:
poke_df = spark.read.format("csv").option("header", "true").load("dbfs:/FileStore/shared_uploads/carlosalonsomingo@gmail.com/pokemon_go_in_.csv")

## Limpieza de los datos

In [0]:
poke_df.printSchema()

root
 |-- Pokemon: string (nullable = true)
 |-- Max CP: string (nullable = true)
 |-- Max HP: string (nullable = true)
 |-- Attack: string (nullable = true)
 |-- Defense: string (nullable = true)
 |-- Stamina: string (nullable = true)
 |-- Total Stats: string (nullable = true)
 |-- Is Legendary: string (nullable = true)
 |-- Type 1: string (nullable = true)
 |-- Type 2: string (nullable = true)



In [0]:
poke_df.display()

Pokemon,Max CP,Max HP,Attack,Defense,Stamina,Total Stats,Is Legendary,Type 1,Type 2
"""Slaking,""""4","431""""",233,290,166,284,740,False,normal,"none"""
"""Regigigas,""""4","346""""",184,287,210,221,718,True,normal,"none"""
"""Mewtwo,""""4","178""""",179,300,182,214,696,True,psychic,"none"""
"""Groudon,""""4","115""""",171,270,228,205,703,True,ground,"none"""
"""Kyogre,""""4","115""""",171,270,228,205,703,True,water,"none"""
"""Zekrom,""""4","038""""",171,275,211,205,691,True,dragon,"electric"""
"""Reshiram,""""4","038""""",171,275,211,205,691,True,dragon,"fire"""
"""Dialga,""""4","038""""",171,275,211,205,691,True,steel,"dragon"""
"""Palkia,""""3","991""""",159,280,215,189,684,True,water,"dragon"""
"""Arceus,""""3","989""""",197,238,238,237,713,True,normal,"none"""


### 1.- Primero aplicamos una transformacion en todas las culmnas

In [0]:
from pyspark.sql import functions as F

poke_df_clean = poke_df.select(
    *[
        F.regexp_replace(c, '"', "").alias(c)#Le ponemos el nombre de la columna como alias para que use ese y no se modifique
        for c in poke_df.columns
    ]

)

#Ahora vamos a mover el numero de antes a la
poke_df_clean = (
    poke_df_clean
    
    .withColumn(
        "Max CP",
        F.when(#Condicionamos
            F.size(F.split(F.col("Pokemon"), ",")) > 1,  
            F.concat(F.split(F.col("Pokemon"), ",").getItem(1), F.col("Max CP"))#Concateno si hay valor que concatenar
        ).otherwise(F.col("Max CP")) #Si no hay valor , lo dejamos tal cual
    )

    .withColumn("Pokemon", F.split(F.col("Pokemon"), ",").getItem(0))
)


poke_df_clean =poke_df_clean.withColumn("Type 2", F.when(F.col("Type 1") == F.col("Type 2"), "none").otherwise(F.col("Type 2")))## pooner a none los que tienen el segundo tipo igual
poke_df_clean.display()

Pokemon,Max CP,Max HP,Attack,Defense,Stamina,Total Stats,Is Legendary,Type 1,Type 2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


### 2.- Vamos a aplicar un schema sobre nuestro dataframe

Para ello crearemos uno nuevo a partir del anterior en rdd, ya que es la forma en la que lo necesita para crearlo

In [0]:
from pyspark.sql import types as T

# Convertir las columnas al tipo correcto antes de aplicar el esquema(si da problemas de parseo)
poke_df_clean_casted = poke_df_clean.select(
    F.col("Pokemon").cast("string"),
    F.col("Max CP").cast("int"),
    F.col("Max HP").cast("int"),
    F.col("Attack").cast("int"),
    F.col("Defense").cast("int"),
    F.col("Stamina").cast("int"),
    F.col("Total Stats").cast("int"),
    F.col("Is Legendary").cast("boolean"),
    F.col("Type 1").cast("string"),
    F.col("Type 2").cast("string")
)
#Una vez casteadas creamos el schema (aprovechamos para mejorar los nombres de las coolumnas)
schema = T.StructType([
  T.StructField("Pokemon", T.StringType(),True),
  T.StructField("Max_CP", T.IntegerType(), True),
  T.StructField("Max_HP", T.IntegerType(), True),
  T.StructField("Attack", T.IntegerType(), True),
  T.StructField("Defense", T.IntegerType(), True),
  T.StructField("Stamina", T.IntegerType(), True),
  T.StructField("Total_Stats", T.IntegerType(), True),
  T.StructField("Is_Legendary", T.BooleanType(), True),
  T.StructField("Type_1", T.StringType(), True),
  T.StructField("Type_2", T.StringType(), True)
])

# Creamos el df a partir del df casteado convertido a rdd
poke_df_clean_schema = spark.createDataFrame(poke_df_clean_casted.rdd, schema=schema)

poke_df_clean_schema.display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


duplicados,
nulos,
tipos,


### 3.- Observación: podriamos validar las columnas convertibles a un enum con una lista

In [0]:
df = poke_df_clean_schema

In [0]:
#obtengo los tipos que hay(En un caso real no los sacaria del propio dataframe ya que el objetivo es validarlo)
#type_list = ['normal', 'bug', 'ghost', 'grass', 'steel', 'ice', 'water', 'ground', 'flying', 'fairy', 'dark', 'fighting', 'dragon', 'poison', 'psychic', 'rock', 'electric', 'fire']
#type_list_2 = ['none', 'bug', 'normal', 'ghost', 'grass', 'steel', 'ice', 'water', 'ground', 'flying', 'fairy', 'dark', 'fighting', 'dragon', 'poison', 'psychic', 'rock', 'electric', 'fire']

type_list_1 = df.select("Type_1").distinct().toPandas()["Type_1"].tolist()
type_list_2 = df.select("Type_2").distinct().toPandas()["Type_2"].tolist()

In [0]:
print(type_list_1)
print(type_list_2)

['normal', 'bug', 'ghost', 'grass', 'steel', 'ice', 'water', 'ground', 'flying', 'fairy', 'dark', 'fighting', 'dragon', 'poison', 'psychic', 'rock', 'electric', 'fire']
['none', 'bug', 'normal', 'ghost', 'grass', 'steel', 'ice', 'water', 'ground', 'flying', 'fairy', 'dark', 'fighting', 'dragon', 'poison', 'psychic', 'rock', 'electric', 'fire']


Aqui vamos a hacer una comprobacion para saber si algun registro tiene errores en el tipo

In [0]:
#Al obtener 0 al comprobar los tipos que no estn en la lista sabemos que no hay tipos mal puestos(en este caso sera 0, porque obtenemos los valres del propio dataframe)
df.filter(~df.Type_1.isin(*type_list_1)).count()
df.filter(~df.Type_2.isin(*type_list_2)).display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2


### 4.- Otra comprobacion interesante es averiguar los nulos

In [0]:
# Mostramos los registros con algun nulo 
# Si saliese algun nulo tomariamos medidas segun la importancia del dato
df_nulls = df.select([F.count(F.when(F.col(c).isNull(), c)).alias(c) for c in df.columns])
df_nulls.show()
 

+-------+------+------+------+-------+-------+-----------+------------+------+------+
|Pokemon|Max_CP|Max_HP|Attack|Defense|Stamina|Total_Stats|Is_Legendary|Type_1|Type_2|
+-------+------+------+------+-------+-------+-----------+------------+------+------+
|      0|     0|     0|     0|      0|      0|          0|           0|     0|     0|
+-------+------+------+------+-------+-------+-----------+------------+------+------+



In [0]:
df.display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


### 5.- Interesante comprobar que la suma de ataque, defensa y stamina sea igual al Total_Stats

In [0]:
stats_mal = df.select(
  "Pokemon",
  (F.col("Attack") + F.col("Defense") + F.col("Stamina") != F.col("Total_Stats")).alias("Stats_Error")

).filter(F.col("Stats_Error") == True).count()
print(stats_mal)
#Si es 0 determinamos que las stats son correctas, en caso contraro evaluar los datos

0


# Dando valor de negocio a los datos

In [0]:
df.display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


### 0.-Vamos a probar graficos con el df completo usando las funcionaldades de Databricks

##### 0.1.- Obteniendo valores relativos al tipo_1

In [0]:
df.display()#Tendrian mas sentido hacerlo con las medias

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

Databricks visualization. Run in Databricks to view.

##### 0.2 Podriamos hacer lo mismo con los datos de tipo 2

In [0]:
df.display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


In [0]:
df_attacks = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("dbfs:/FileStore/shared_uploads/carlosalonsomingo@gmail.com/chart.csv")
df_attacks.display()
df_attacks.printSchema()

Attacking,Normal,Fire,Water,Electric,Grass,Ice,Fighting,Poison,Ground,Flying,Psychic,Bug,Rock,Ghost,Dragon,Dark,Steel,Fairy
Normal,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0,1.0,1.0,0.5,1.0
Fire,1,0.5,0.5,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,0.5,1.0,2.0,1.0
Water,1,2.0,0.5,1.0,0.5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,1.0,1.0
Electric,1,1.0,2.0,0.5,0.5,1.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0
Grass,1,0.5,2.0,1.0,0.5,1.0,1.0,0.5,2.0,0.5,1.0,0.5,2.0,1.0,0.5,1.0,0.5,1.0
Ice,1,0.5,0.5,1.0,2.0,0.5,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0
Fighting,2,1.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,0.5,0.5,0.5,2.0,0.0,1.0,2.0,2.0,0.5
Poison,1,1.0,1.0,1.0,2.0,1.0,1.0,0.5,0.5,1.0,1.0,1.0,0.5,0.5,1.0,1.0,0.0,2.0
Ground,1,2.0,1.0,2.0,0.5,1.0,1.0,2.0,1.0,0.0,1.0,0.5,2.0,1.0,1.0,1.0,2.0,1.0
Flying,1,1.0,1.0,0.5,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,1.0,1.0,0.5,1.0


root
 |-- Attacking: string (nullable = true)
 |-- Normal: integer (nullable = true)
 |-- Fire: double (nullable = true)
 |-- Water: double (nullable = true)
 |-- Electric: double (nullable = true)
 |-- Grass: double (nullable = true)
 |-- Ice: double (nullable = true)
 |-- Fighting: double (nullable = true)
 |-- Poison: double (nullable = true)
 |-- Ground: double (nullable = true)
 |-- Flying: double (nullable = true)
 |-- Psychic: double (nullable = true)
 |-- Bug: double (nullable = true)
 |-- Rock: double (nullable = true)
 |-- Ghost: double (nullable = true)
 |-- Dragon: double (nullable = true)
 |-- Dark: double (nullable = true)
 |-- Steel: double (nullable = true)
 |-- Fairy: double (nullable = true)



##### Primero vamos a limpiar un poco el df

In [0]:
# Aqui renombramos las columnas y hacemos 2 datframes, son el mismo, pero con headers diferentes, para luego unir con Type_1 y Type_2 respectivamente
df_attacks_Type_1 = df_attacks.toDF(*[f"Type_1_to({col.lower()})" for col in df_attacks.columns])
df_attacks_Type_2 = df_attacks.toDF(*[f"Type_2_to({col.lower()})" for col in df_attacks.columns])

df_attacks_Type_1.display()

Type_1_to(attacking),Type_1_to(normal),Type_1_to(fire),Type_1_to(water),Type_1_to(electric),Type_1_to(grass),Type_1_to(ice),Type_1_to(fighting),Type_1_to(poison),Type_1_to(ground),Type_1_to(flying),Type_1_to(psychic),Type_1_to(bug),Type_1_to(rock),Type_1_to(ghost),Type_1_to(dragon),Type_1_to(dark),Type_1_to(steel),Type_1_to(fairy)
Normal,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0,1.0,1.0,0.5,1.0
Fire,1,0.5,0.5,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,0.5,1.0,2.0,1.0
Water,1,2.0,0.5,1.0,0.5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,1.0,1.0
Electric,1,1.0,2.0,0.5,0.5,1.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0
Grass,1,0.5,2.0,1.0,0.5,1.0,1.0,0.5,2.0,0.5,1.0,0.5,2.0,1.0,0.5,1.0,0.5,1.0
Ice,1,0.5,0.5,1.0,2.0,0.5,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0
Fighting,2,1.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,0.5,0.5,0.5,2.0,0.0,1.0,2.0,2.0,0.5
Poison,1,1.0,1.0,1.0,2.0,1.0,1.0,0.5,0.5,1.0,1.0,1.0,0.5,0.5,1.0,1.0,0.0,2.0
Ground,1,2.0,1.0,2.0,0.5,1.0,1.0,2.0,1.0,0.0,1.0,0.5,2.0,1.0,1.0,1.0,2.0,1.0
Flying,1,1.0,1.0,0.5,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,1.0,1.0,0.5,1.0


In [0]:
combined_df = (
    df.join(
        df_attacks_Type_1,
        df.Type_1 == F.lower(df_attacks_Type_1["Type_1_to(attacking)"])
        ).drop(df_attacks_Type_1["Type_1_to(attacking)"])
    .join(
        df_attacks_Type_2,
        df.Type_2 == F.lower(df_attacks_Type_2["Type_2_to(attacking)"]),
        "left" #Aqui hacemos left para que las filas con Type_2 con none( que no coinciden) se mantengan y se seten a null
        ).drop(df_attacks_Type_2["Type_2_to(attacking)"])
        .fillna(0)
    )

combined_df.display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2,Type_1_to(normal),Type_1_to(fire),Type_1_to(water),Type_1_to(electric),Type_1_to(grass),Type_1_to(ice),Type_1_to(fighting),Type_1_to(poison),Type_1_to(ground),Type_1_to(flying),Type_1_to(psychic),Type_1_to(bug),Type_1_to(rock),Type_1_to(ghost),Type_1_to(dragon),Type_1_to(dark),Type_1_to(steel),Type_1_to(fairy),Type_2_to(normal),Type_2_to(fire),Type_2_to(water),Type_2_to(electric),Type_2_to(grass),Type_2_to(ice),Type_2_to(fighting),Type_2_to(poison),Type_2_to(ground),Type_2_to(flying),Type_2_to(psychic),Type_2_to(bug),Type_2_to(rock),Type_2_to(ghost),Type_2_to(dragon),Type_2_to(dark),Type_2_to(steel),Type_2_to(fairy)
Slaking,4431,233,290,166,284,740,False,normal,none,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0,1.0,1.0,0.5,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Regigigas,4346,184,287,210,221,718,True,normal,none,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0,1.0,1.0,0.5,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Mewtwo,4178,179,300,182,214,696,True,psychic,none,1,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,0.5,1.0,1.0,1.0,1.0,0.0,0.5,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Groudon,4115,171,270,228,205,703,True,ground,none,1,2.0,1.0,2.0,0.5,1.0,1.0,2.0,1.0,0.0,1.0,0.5,2.0,1.0,1.0,1.0,2.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Kyogre,4115,171,270,228,205,703,True,water,none,1,2.0,0.5,1.0,0.5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,1.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zekrom,4038,171,275,211,205,691,True,dragon,electric,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,0.0,1,1.0,2.0,0.5,0.5,1.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0
Reshiram,4038,171,275,211,205,691,True,dragon,fire,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,0.0,1,0.5,0.5,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,0.5,1.0,2.0,1.0
Dialga,4038,171,275,211,205,691,True,steel,dragon,1,0.5,0.5,0.5,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,0.5,2.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,0.0
Palkia,3991,159,280,215,189,684,True,water,dragon,1,2.0,0.5,1.0,0.5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,1.0,1.0,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,0.0
Arceus,3989,197,238,238,237,713,True,normal,none,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0,1.0,1.0,0.5,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
combined_df.groupBy().display()

Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
Slaking,4431,233,290,166,284,740,False,normal,none
Regigigas,4346,184,287,210,221,718,True,normal,none
Mewtwo,4178,179,300,182,214,696,True,psychic,none
Groudon,4115,171,270,228,205,703,True,ground,none
Kyogre,4115,171,270,228,205,703,True,water,none
Zekrom,4038,171,275,211,205,691,True,dragon,electric
Reshiram,4038,171,275,211,205,691,True,dragon,fire
Dialga,4038,171,275,211,205,691,True,steel,dragon
Palkia,3991,159,280,215,189,684,True,water,dragon
Arceus,3989,197,238,238,237,713,True,normal,none


In [0]:
df.filter(df.Type_1 == df.Type_2).display()


Pokemon,Max_CP,Max_HP,Attack,Defense,Stamina,Total_Stats,Is_Legendary,Type_1,Type_2
