In [1]:
from pyiceberg import __version__

__version__

'0.6.0'

In [2]:
%%sql

CREATE DATABASE IF NOT EXISTS pokemon;

24/10/23 11:10:08 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [12]:
import requests
import pandas as pd

# Função para obter dados dos Pokémon
def get_pokemon_data(pokemon_id):
    url = f'https://pokeapi.co/api/v2/pokemon/{pokemon_id}/'
    response = requests.get(url)
    return response.json()

# Lista para armazenar os dados dos Pokémon
pokemon_list = []

# Loop pelos 151 Pokémon iniciais
for i in range(1, 152):
    data = get_pokemon_data(i)
    name = data['name']
    ataque = data['stats'][1]['base_stat']  # Ataque
    defesa = data['stats'][2]['base_stat']  # Defesa
    energia = data['stats'][0]['base_stat']  # HP (Energia)
    
    pokemon_list.append({
        'numero': i,
        'name': name,
        'ataque': ataque,
        'defesa': defesa,
        'energia': energia
    })

# Criar um DataFrame
df = pd.DataFrame(pokemon_list)

In [13]:
df.shape

(151, 5)

In [14]:
df.head()

Unnamed: 0,numero,name,ataque,defesa,energia
0,1,bulbasaur,49,49,45
1,2,ivysaur,62,63,60
2,3,venusaur,82,83,80
3,4,charmander,52,43,39
4,5,charmeleon,64,58,58


In [7]:
%%sql
DROP TABLE IF EXISTS pokemon.primeira_geracao;

In [8]:
%%sql
CREATE TABLE IF NOT EXISTS pokemon.primeira_geracao (
    name              string,
    ataque  int,
    defesa int,
    energia       int
)
USING iceberg
PARTITIONED BY (name)

In [9]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("pokemon").getOrCreate()

24/10/23 11:15:43 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [10]:
# Converte o DataFrame do Pandas para um DataFrame do Spark
spark_df = spark.createDataFrame(df)

# Exibe o DataFrame do Spark
spark_df.show()

                                                                                

+----------+------+------+-------+
|      name|ataque|defesa|energia|
+----------+------+------+-------+
| bulbasaur|    49|    49|     45|
|   ivysaur|    62|    63|     60|
|  venusaur|    82|    83|     80|
|charmander|    52|    43|     39|
|charmeleon|    64|    58|     58|
| charizard|    84|    78|     78|
|  squirtle|    48|    65|     44|
| wartortle|    63|    80|     59|
| blastoise|    83|   100|     79|
|  caterpie|    30|    35|     45|
|   metapod|    20|    55|     50|
|butterfree|    45|    50|     60|
|    weedle|    35|    30|     40|
|    kakuna|    25|    50|     45|
|  beedrill|    90|    40|     65|
|    pidgey|    45|    40|     40|
| pidgeotto|    60|    55|     63|
|   pidgeot|    80|    75|     83|
|   rattata|    56|    35|     30|
|  raticate|    81|    60|     55|
+----------+------+------+-------+
only showing top 20 rows



In [11]:
spark_df.write.mode("append").saveAsTable("pokemon.primeira_geracao")

                                                                                