###Importar arquivos 

Para baixar um arquivo do GitHub e salvá-lo no Databricks, você pode seguir os passos abaixo:


**Exemplo para importar apenas um arquivo**

In [0]:
import requests

url = 'https://raw.githubusercontent.com/andressa-mangolin/data-project-01/refs/heads/main/scr/customers.csv'
dbfs_path = '/Volumes/workspace/lhdw/landingzone/vendas/processar/customers.csv'

response = requests.get(url)
response.raise_for_status()

dbutils.fs.put(
    dbfs_path,
    response.text,
    overwrite=True
)

print(f"Arquivo baixado e salvo em: {dbfs_path}")

Wrote 9033957 bytes.
Arquivo baixado e salvo em: /Volumes/workspace/lhdw/landingzone/vendas/processar/customers.csv


**Evidência do Arquivo customers.csv criado**

In [0]:

%fs ls /Volumes/workspace/lhdw/landingzone/vendas/processar


path,name,size,modificationTime
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/customers.csv,customers.csv,9033957,1763566671000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/geolocation.csv,geolocation.csv,61273883,1763566677000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/order_items.csv,order_items.csv,15438671,1763566681000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/order_payments.csv,order_payments.csv,5777138,1763566682000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/order_reviews.csv,order_reviews.csv,14346950,1763566685000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/orders.csv,orders.csv,17654914,1763566689000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/product_category_name_translation.csv,product_category_name_translation.csv,2542,1763566691000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/products.csv,products.csv,2379446,1763566690000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/sellers.csv,sellers.csv,174703,1763566691000


**Importa vários arquivos**

In [0]:
import requests
from typing import List

# 1. LISTA DE ARQUIVOS: O que vamos baixar
arquivos_a_baixar: List[str] = [
    'customers.csv',
    'geolocation.csv',
    'order_items.csv',
    'order_payments.csv',
    'order_reviews.csv',
    'orders.csv',
    'products.csv',
    'sellers.csv',
    'product_category_name_translation.csv'
]

# 2. ENDEREÇOS BASE: A parte da URL que não muda
url_base_github = 'https://raw.githubusercontent.com/andressa-mangolin/data-project-01/refs/heads/main/scr/'
caminho_base_databricks = '/Volumes/workspace/lhdw/landingzone/vendas/processar/'

# 3. REPETIÇÃO: Baixa um por um
for nome_do_arquivo in arquivos_a_baixar:
    
    # Monta a URL completa (Junta a base com o nome do arquivo)
    url_completa = url_base_github + nome_do_arquivo
    
    # Monta o destino completo no Databricks
    caminho_destino = caminho_base_databricks + nome_do_arquivo
    
    # --- AÇÃO DE INGESTÃO ---
    
    # 1. Pega o arquivo do github
    response = requests.get(url_completa)
    
    # 2. Salva o conteúdo do arquivo no seu Volume do Databricks
    dbutils.fs.put(
        caminho_destino,
        response.text,
        overwrite=True
    )
    
    print(f"✅ {nome_do_arquivo} salvo com sucesso!")

Wrote 9033957 bytes.
✅ customers.csv salvo com sucesso!
Wrote 61273883 bytes.
✅ geolocation.csv salvo com sucesso!
Wrote 15438671 bytes.
✅ order_items.csv salvo com sucesso!
Wrote 5777138 bytes.
✅ order_payments.csv salvo com sucesso!
Wrote 14346950 bytes.
✅ order_reviews.csv salvo com sucesso!
Wrote 17654914 bytes.
✅ orders.csv salvo com sucesso!
Wrote 2379446 bytes.
✅ products.csv salvo com sucesso!
Wrote 174703 bytes.
✅ sellers.csv salvo com sucesso!
Wrote 2542 bytes.
✅ product_category_name_translation.csv salvo com sucesso!


In [0]:
# Lista o conteúdo do repositorio da pasta processar
display(
dbutils.fs.ls("/Volumes/workspace/lhdw/landingzone/vendas/processar")
)


path,name,size,modificationTime
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/customers.csv,customers.csv,9033957,1763567684000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/geolocation.csv,geolocation.csv,61273883,1763567687000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/order_items.csv,order_items.csv,15438671,1763567689000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/order_payments.csv,order_payments.csv,5777138,1763567690000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/order_reviews.csv,order_reviews.csv,14346950,1763567691000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/orders.csv,orders.csv,17654914,1763567693000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/product_category_name_translation.csv,product_category_name_translation.csv,2542,1763567695000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/products.csv,products.csv,2379446,1763567694000
dbfs:/Volumes/workspace/lhdw/landingzone/vendas/processar/sellers.csv,sellers.csv,174703,1763567695000


In [0]:
# Lista o conteúdo do repositorio da pasta processado
display(
dbutils.fs.ls("/Volumes/workspace/lhdw/landingzone/vendas/processado")
)

[]