# Ingestão de Dados - FIFA 21 (Camada Raw)

## Imports

In [46]:
import pandas as pd
import psycopg2
from psycopg2 import sql

# Leitura do csv

In [47]:
csv_path = "fifa21_raw_data.csv"

df = pd.read_csv(csv_path)

print("Arquivo carregado com sucesso!")
print(f"Total de registros: {len(df)}")
df.head()


Arquivo carregado com sucesso!
Total de registros: 18979


  df = pd.read_csv(csv_path)


Unnamed: 0,photoUrl,LongName,playerUrl,Nationality,Positions,Name,Age,↓OVA,POT,Team & Contract,...,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits
0,https://cdn.sofifa.com/players/158/023/21_60.png,Lionel Messi,http://sofifa.com/player/158023/lionel-messi/2...,Argentina,RW ST CF,L. Messi,33,93,93,\n\n\n\nFC Barcelona\n2004 ~ 2021\n\n,...,Medium,Low,5 ★,85,92,91,95,38,65,\n372
1,https://cdn.sofifa.com/players/020/801/21_60.png,C. Ronaldo dos Santos Aveiro,http://sofifa.com/player/20801/c-ronaldo-dos-s...,Portugal,ST LW,Cristiano Ronaldo,35,92,92,\n\n\n\nJuventus\n2018 ~ 2022\n\n,...,High,Low,5 ★,89,93,81,89,35,77,\n344
2,https://cdn.sofifa.com/players/200/389/21_60.png,Jan Oblak,http://sofifa.com/player/200389/jan-oblak/210005/,Slovenia,GK,J. Oblak,27,91,93,\n\n\n\nAtlético Madrid\n2014 ~ 2023\n\n,...,Medium,Medium,3 ★,87,92,78,90,52,90,\n86
3,https://cdn.sofifa.com/players/192/985/21_60.png,Kevin De Bruyne,http://sofifa.com/player/192985/kevin-de-bruyn...,Belgium,CAM CM,K. De Bruyne,29,91,91,\n\n\n\nManchester City\n2015 ~ 2023\n\n,...,High,High,4 ★,76,86,93,88,64,78,\n163
4,https://cdn.sofifa.com/players/190/871/21_60.png,Neymar da Silva Santos Jr.,http://sofifa.com/player/190871/neymar-da-silv...,Brazil,LW CAM,Neymar Jr,28,91,91,\n\n\n\nParis Saint-Germain\n2017 ~ 2022\n\n,...,High,Medium,5 ★,91,85,86,94,36,59,\n273


## Configuração do Banco

In [48]:
db_config = {
    'host': 'localhost',
    'port': 5433,
    'database': 'airflow',
    'user': 'airflow',
    'password': 'airflow'
}

## Conexão com o PostgreSQL

In [49]:
conn = psycopg2.connect(**db_config)
cursor = conn.cursor()

print("Conexão com o PostgreSQL realizada com sucesso!")

Conexão com o PostgreSQL realizada com sucesso!


## Ingestão

In [50]:
def pandas_to_postgres_type(dtype):
    if pd.api.types.is_integer_dtype(dtype):
        return "INTEGER"
    elif pd.api.types.is_float_dtype(dtype):
        return "DOUBLE PRECISION"
    elif pd.api.types.is_bool_dtype(dtype):
        return "BOOLEAN"
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return "TIMESTAMP"
    else:
        return "TEXT"

schema = "raw"
table_name = "fifa21_players"
full_table_name = f"{schema}.{table_name}"

# 1️⃣ Garante que o schema exista
create_schema_query = f"""
CREATE SCHEMA IF NOT EXISTS {schema};
"""

cursor.execute(create_schema_query)

# 2️⃣ Montagem das colunas
columns_with_types = []

for col, dtype in df.dtypes.items():
    pg_type = pandas_to_postgres_type(dtype)
    columns_with_types.append(f'"{col}" {pg_type}')

columns_sql = ", ".join(columns_with_types)

# 3️⃣ Drop e Create da tabela no schema raw
drop_table_query = f"""
DROP TABLE IF EXISTS {full_table_name};
"""

create_table_query = f"""
CREATE TABLE {full_table_name} (
    id SERIAL PRIMARY KEY,
    {columns_sql}
);
"""

cursor.execute(drop_table_query)
cursor.execute(create_table_query)
conn.commit()

print(f"Tabela '{full_table_name}' recriada com sucesso no schema '{schema}'.")

Tabela 'raw.fifa21_players' recriada com sucesso no schema 'raw'.


In [None]:
df_insert = df.where(pd.notnull(df), None)

columns = list(df_insert.columns)

insert_query = sql.SQL("""
    INSERT INTO {} ({})
    VALUES ({})
""").format(
    sql.Identifier(schema, table_name),
    sql.SQL(', ').join(map(sql.Identifier, columns)),
    sql.SQL(', ').join(sql.Placeholder() for _ in columns)
)

data_tuples = [tuple(row) for row in df_insert.itertuples(index=False)]

cursor.executemany(insert_query, data_tuples)
conn.commit()

print(f"{len(data_tuples)} registros inseridos com sucesso.")


## Testando ingestão

In [None]:
cursor.execute(f"SELECT COUNT(*) FROM {table_name};")
total = cursor.fetchone()[0]

print(f"Total de registros na tabela '{table_name}': {total}")

Total de registros na tabela 'fifa21_players': 18979


## Fechando conexão

In [None]:
cursor.close()
conn.close()

print("Conexão encerrada.")

Conexão encerrada.
