In [3]:
import psycopg2

DB_CONFIG = {
    'host': 'localhost',
    'port': 5433,
    'database': 'airflow',
    'user': 'airflow',
    'password': 'airflow'
}

conn = psycopg2.connect(**DB_CONFIG)
cur = conn.cursor()

cur.execute("""
    CREATE TEMP TABLE teste_escrita (
        id INT,
        descricao TEXT
    );
""")

cur.execute("""
    INSERT INTO teste_escrita VALUES (1, 'teste ok');
""")

cur.execute("SELECT * FROM teste_escrita;")
print(cur.fetchall())

conn.commit()
cur.close()
conn.close()


OperationalError: connection to server at "localhost" (127.0.0.1), port 5433 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?


In [None]:
import pandas as pd
import numpy as np
import re
import os
import unicodedata

# 1. Configurações e Caminhos
INPUT_FILE = '../Data Layer/raw/fifa21_raw_data.csv'
OUTPUT_DIR = '../Data Layer/silver'
OUTPUT_FILE_CSV = 'fifa21_silver.csv'

# 2. Funções Auxiliares de Limpeza

def parse_currency(value):
    """Converte €67.5M -> 67500000.0 ou €560K -> 560000.0"""
    if pd.isna(value) or value == '': return 0.0
    val = str(value).replace('€', '').strip()
    multiplier = 1
    if 'M' in val:
        multiplier = 1_000_000
        val = val.replace('M', '')
    elif 'K' in val:
        multiplier = 1_000
        val = val.replace('K', '')
    try:
        return float(val) * multiplier
    except:
        return 0.0

def convert_height_cm(h):
    """Converte 5'7" para centímetros"""
    if pd.isna(h): return None
    try:
        parts = re.findall(r"(\d+)", str(h))
        feet = int(parts[0])
        inches = int(parts[1]) if len(parts) > 1 else 0
        return round((feet * 30.48) + (inches * 2.54), 1)
    except:
        return None

def clean_team_contract(text):
    """Limpa a sujeira da coluna Team & Contract"""
    if pd.isna(text): return "Free Agent", None
    lines = [line.strip() for line in str(text).split('\n') if line.strip()]
    if not lines: return "Free Agent", None
    
    team = lines[0]
    contract = lines[1] if len(lines) > 1 else "Unknown"
    return team, contract

# 3. Processamento Bronze -> Silver

print("Iniciando ETL FIFA...")
df = pd.read_csv(INPUT_FILE)

# 3.1 Limpeza e Tipagem Básica
df['Name'] = df['LongName'].apply(lambda x: unicodedata.normalize('NFKD', str(x)).encode('ascii', 'ignore').decode('utf-8'))
df['Height_cm'] = df['Height'].apply(convert_height_cm)
df['Weight_kg'] = df['Weight'].str.extract(r'(\d+)').astype(float).apply(lambda x: round(x * 0.453592, 1))

# 3.2 Financeiro
df['Value_Eur'] = df['Value'].apply(parse_currency)
df['Wage_Eur'] = df['Wage'].apply(parse_currency)
df['Release_Clause_Eur'] = df['Release Clause'].apply(parse_currency)

# 3.3 Tratando a coluna Team & Contract (Split em duas)
temp_team_contract = df['Team & Contract'].apply(clean_team_contract)
df['Team'] = [x[0] for x in temp_team_contract]
df['Contract_Period'] = [x[1] for x in temp_team_contract]

# 3.4 Engenharia de Atributos (Similar ao Quality Score)
# Criando um score de eficiência ofensiva
df['Offensive_Score'] = (df['Attacking'] + df['Skill']) / 2
# Criando faixas de potencial
df['Player_Tier'] = pd.cut(df['POT'], 
                           bins=[0, 70, 80, 85, 90, 100], 
                           labels=['Fodder', 'Starter', 'Elite', 'World Class', 'Legendary'])

# 4. Seleção de Colunas Finais (Organização Silver)
cols_to_keep = [
    'ID', 'Name', 'Nationality', 'Age', 'Team', 'Contract_Period',
    'Height_cm', 'Weight_kg', 'Positions', 'BP', 'foot',
    '↓OVA', 'POT', 'Player_Tier', 'Growth',
    'Value_Eur', 'Wage_Eur', 'Release_Clause_Eur',
    'Offensive_Score', 'Total Stats', 'Base Stats', 'Hits'
]

df_silver = df[cols_to_keep].copy()

# 5. Salvamento
os.makedirs(OUTPUT_DIR, exist_ok=True)
df_silver.to_csv(os.path.join(OUTPUT_DIR, OUTPUT_FILE_CSV), index=False)

print(f"Sucesso! {len(df_silver)} jogadores processados.")
print(df_silver[['Name', 'Team', 'Value_Eur', 'Player_Tier']].head())

Iniciando ETL FIFA...


  df = pd.read_csv(INPUT_FILE)


KeyError: "['OVA'] not in index"