![alt text](https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/ab15e7be-ab9a-4fe2-966e-63d73c3437be/d9sqoap-c6e1e41d-d2b5-4d92-92c1-27c92e072bfd.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7InBhdGgiOiJcL2ZcL2FiMTVlN2JlLWFiOWEtNGZlMi05NjZlLTYzZDczYzM0MzdiZVwvZDlzcW9hcC1jNmUxZTQxZC1kMmI1LTRkOTItOTJjMS0yN2M5MmUwNzJiZmQuanBnIn1dXSwiYXVkIjpbInVybjpzZXJ2aWNlOmZpbGUuZG93bmxvYWQiXX0.L8DD-1wES0TG3BL1hL72ADFhahth_278Xa7s7hSjNI8)

# <b><font color='#62e4ff'>PROCESSOS</font></b>

## <b><font color='#62e4ff'>BIBLIOTECAS</font></b>

In [4]:
import os
import pandas as pd
import duckdb
from dotenv import load_dotenv
# Carregar as variáveis de ambiente do arquivo .env
load_dotenv()

# Acessar as variáveis de ambiente
pasta_csv = os.getenv('PASTA_CSV')
banco_dados_duckdb = os.getenv('BANCO_DADOS_DUCKDB')
pasta_parquet = os.getenv('PASTA_PARQUET')

## <b><font color='#62e4ff'>FUNÇÕES</font></b>

In [5]:


def connetionFactory():
    try:
        # Conexão com o banco de dados DuckDB
        conn = duckdb.connect(banco_dados_duckdb)
        return conn
    except Exception as e:
        print(f" ERROR <=> {e} ... ")    
        return None
    

def main():
    print(f"  >> INICIANDO PROCESSO DE CRIAÇÃO DE BASES F1")
    try:
        conn = connetionFactory()
        if conn:
            # Iterando sobre os arquivos na pasta
            for arquivo in os.listdir(pasta_csv):
                if arquivo.endswith('.csv'):
                    nome_arquivo = os.path.splitext(arquivo)[0]
                    print(f"    ** Lendo Arquivo => {arquivo} <= ")
                    caminho_csv = os.path.join(pasta_csv, arquivo)
                    
                    # Ler o CSV
                    df = pd.read_csv(caminho_csv)
                    
                    # Criar o Parquet
                    caminho_parquet = os.path.join(pasta_parquet, f'{nome_arquivo}.parquet')
                    print(f"    ** Criando Arquivo .Parquet ")
                    df.to_parquet(caminho_parquet, index=False)
                    
                    # Verificar se a tabela existe
                    check_table_query = f"""
                    SELECT COUNT(*) FROM information_schema.tables 
                    WHERE table_name = '{nome_arquivo}';
                    """
                    result = conn.execute(check_table_query).fetchone()[0]

                    # Se a tabela existe, remover
                    if result > 0:
                        print(f"    ** Tabela {nome_arquivo} existe. Removendo...")
                        conn.execute(f"DROP TABLE {nome_arquivo};")

                    # Criar a nova tabela no DuckDB a partir do Parquet
                    conn.execute(f"""
                    CREATE TABLE {nome_arquivo} AS
                    SELECT * FROM read_parquet('{caminho_parquet}');
                    """)
                    print(f"    ** Tabela {nome_arquivo} criada no duckdb")
                    print(f"    *****************************************")
                    print("")

        print(f"  >> FINALIZANDO PROCESSO DE CRIAÇÃO DE BASES F1")            
    except Exception as e:
        print(f"   >> ERROR NO PROCESSO <=> {e}")
    finally:
        if conn:
            conn.close()
            print("Conexão com o DuckDB fechada.")

## <b><font color='#62e4ff'>EXECUÇÃO</font></b>

In [6]:

if __name__ == "__main__":
    main()


  >> INICIANDO PROCESSO DE CRIAÇÃO DE BASES F1
    ** Lendo Arquivo => circuits.csv <= 
    ** Criando Arquivo .Parquet 
    ** Tabela circuits existe. Removendo...
    ** Tabela circuits criada no duckdb
    *****************************************

    ** Lendo Arquivo => status.csv <= 
    ** Criando Arquivo .Parquet 
    ** Tabela status existe. Removendo...
    ** Tabela status criada no duckdb
    *****************************************

    ** Lendo Arquivo => lap_times.csv <= 
    ** Criando Arquivo .Parquet 
    ** Tabela lap_times existe. Removendo...
    ** Tabela lap_times criada no duckdb
    *****************************************

    ** Lendo Arquivo => sprint_results.csv <= 
    ** Criando Arquivo .Parquet 
    ** Tabela sprint_results existe. Removendo...
    ** Tabela sprint_results criada no duckdb
    *****************************************

    ** Lendo Arquivo => drivers.csv <= 
    ** Criando Arquivo .Parquet 
    ** Tabela drivers existe. Removendo...
  

# <b><font color='#8efcb1'>ANALISE</font></b>

In [7]:
def connetionFactory():
    try:
        # Conexão com o banco de dados DuckDB
        conn = duckdb.connect(banco_dados_duckdb)
        return conn
    except Exception as e:
        print(f" ERROR <=> {e} ... ")    
        return None

conn = connetionFactory()
df = conn.execute("SELECT * FROM races  where year = 2024").fetchdf()
display(df.tail())


Unnamed: 0,raceId,year,round,circuitId,name,date,time,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
19,1140,2024,20,32,Mexico City Grand Prix,2024-10-27,20:00:00,https://en.wikipedia.org/wiki/2024_Mexico_City...,2024-10-25,18:30:00,2024-10-25,22:00:00,2024-10-26,17:30:00,2024-10-26,21:00:00,\N,\N
20,1141,2024,21,18,São Paulo Grand Prix,2024-11-03,17:00:00,https://en.wikipedia.org/wiki/2024_S%C3%A3o_Pa...,2024-11-01,14:30:00,2024-11-01,18:30:00,\N,\N,2024-11-02,18:00:00,2024-11-02,14:00:00
21,1142,2024,22,80,Las Vegas Grand Prix,2024-11-23,06:00:00,https://en.wikipedia.org/wiki/2024_Las_Vegas_G...,2024-11-21,02:30:00,2024-11-21,06:00:00,2024-11-22,02:30:00,2024-11-22,06:00:00,\N,\N
22,1143,2024,23,78,Qatar Grand Prix,2024-12-01,17:00:00,https://en.wikipedia.org/wiki/2024_Qatar_Grand...,2024-11-29,13:30:00,2024-11-29,17:30:00,\N,\N,2024-11-30,17:00:00,2024-11-30,13:00:00
23,1144,2024,24,24,Abu Dhabi Grand Prix,2024-12-08,13:00:00,https://en.wikipedia.org/wiki/2024_Abu_Dhabi_G...,2024-12-06,09:30:00,2024-12-06,13:00:00,2024-12-07,10:30:00,2024-12-07,14:00:00,\N,\N
