![Conexão com o Azure Data Lake](https://raw.githubusercontent.com/joaovilar/Python/main/Python_Jupyter/from_container_to_database.png)

Conexão com o Azure Data Lake

In [81]:
#importando as bibliotecas
from azure.storage.blob import BlobServiceClient
from io import BytesIO
import pandas as pd
from datetime import datetime

In [82]:

# Chaves de acesso e URL do serviço
account_key = "SuaChaveDeAcesso"
account_name = "NomeDaConta"
container_name = "NomeDoContainer"

# Cria o cliente de serviço
blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)


Leitura dos arquivos csv no container

In [83]:

def ler_arquivo_csv(account_key, account_name, container_name, blob_name, delimitador=';'):
    # Cria o cliente de serviço
    blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)

    # Obtém referência para o blob
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)

    # Lê o conteúdo do blob (arquivo CSV) em um DataFrame
    blob_content = blob_client.download_blob().readall()
    # Especifique o delimitador usando o parâmetro sep
    df = pd.read_csv(BytesIO(blob_content), sep=delimitador)

    return df

# Exemplo de uso
df_stores = ler_arquivo_csv(account_key, account_name, container_name, "CSV/Stores.csv")
df_product = ler_arquivo_csv(account_key, account_name, container_name, "CSV/Product.csv")
df_channel = ler_arquivo_csv(account_key, account_name, container_name, "CSV/Channel.csv")
df_product_category = ler_arquivo_csv(account_key, account_name, container_name, "CSV/ProductCategory.csv")
df_geography = ler_arquivo_csv(account_key, account_name, container_name, "CSV/Geography.csv")


In [84]:
# Adicionar a coluna do dia corrente
df_stores['CurrentDateTime'] = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
df_product['CurrentDateTime'] = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
df_channel['CurrentDateTime'] = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
df_product_category['CurrentDateTime'] = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
df_geography['CurrentDateTime'] = datetime.now().strftime('%d/%m/%Y %H:%M:%S')

In [85]:
#tratando a coluna com informações nulas
df_stores['CloseReason'] = df_stores['CloseReason'].fillna('Not informed')

df_stores.head()

Unnamed: 0,StoreKey,GeographyKey,StoreType,StoreName,Status,CloseReason,EmployeeCount,SellingAreaSize,CurrentDateTime
0,1,693,Store,Contoso Seattle No.1 Store,On,Not informed,17.0,462,04/01/2024 09:16:19
1,2,693,Store,Contoso Seattle No.2 Store,On,Not informed,25.0,700,04/01/2024 09:16:19
2,3,856,Store,Contoso Kennewick Store,On,Not informed,26.0,680,04/01/2024 09:16:19
3,4,424,Store,Contoso Bellevue Store,On,Not informed,19.0,455,04/01/2024 09:16:19
4,5,677,Store,Contoso Redmond Store,On,Not informed,33.0,560,04/01/2024 09:16:19


In [86]:
df_product.head()

Unnamed: 0,ProductName,ProductDescription,Manufacturer,BrandName,ClassName,UnitCost,UnitPrice,ProductKey,ProductSubcategoryKey,CurrentDateTime
0,Contoso Wireless Laser Mouse E50 Grey,Advanced 2.4 GHz cordless technology makes fre...,"Contoso, Ltd",Contoso,Economy,1069,2096,873,22,04/01/2024 09:16:19
1,Contoso Optical Wheel OEM PS/2 Mouse E60 Grey,"PS/2 mouse, 6 feet mouse cable","Contoso, Ltd",Contoso,Economy,663,13,879,22,04/01/2024 09:16:19
2,Contoso Optical Wheel OEM PS/2 Mouse E60 Black,"PS/2 mouse, 6 feet mouse cable","Contoso, Ltd",Contoso,Economy,663,13,880,22,04/01/2024 09:16:19
3,Contoso Optical Wheel OEM PS/2 Mouse E60 White,"PS/2 mouse, 6 feet mouse cable","Contoso, Ltd",Contoso,Economy,663,13,881,22,04/01/2024 09:16:19
4,Contoso Optical Wheel OEM PS/2 Mouse E60 Silver,"PS/2 mouse, 6 feet mouse cable","Contoso, Ltd",Contoso,Economy,663,13,882,22,04/01/2024 09:16:19


In [87]:
df_channel

Unnamed: 0,Channel,ChannelName,CurrentDateTime
0,1,Store,04/01/2024 09:16:19
1,2,Online,04/01/2024 09:16:19
2,3,Catalog,04/01/2024 09:16:19
3,4,Reseller,04/01/2024 09:16:19


In [88]:
df_product_category

Unnamed: 0,ProductCategoryKey,ProductCategory,CurrentDateTime
0,1,Audio,04/01/2024 09:16:19
1,2,TV and Video,04/01/2024 09:16:19
2,3,Computers,04/01/2024 09:16:19
3,4,Cameras and camcorders,04/01/2024 09:16:19
4,5,Cell phones,04/01/2024 09:16:19
5,6,"Music, Movies and Audio Books",04/01/2024 09:16:19
6,7,Games and Toys,04/01/2024 09:16:19
7,8,Home Appliances,04/01/2024 09:16:19


In [89]:
df_geography.head()

Unnamed: 0,GeographyKey,GeographyType,ContinentName,RegionCountryName,CurrentDateTime
0,424,City,North America,United States,04/01/2024 09:16:19
1,430,City,North America,United States,04/01/2024 09:16:19
2,431,City,North America,United States,04/01/2024 09:16:19
3,432,City,North America,United States,04/01/2024 09:16:19
4,433,City,North America,United States,04/01/2024 09:16:19


Lista todos os blobs do azure storage

In [90]:
# Obtém referência para o contêiner
container_client = blob_service_client.get_container_client(container_name)

# Lista os blobs no contêiner
blobs = container_client.list_blobs()

# Itera sobre os blobs e faz alguma coisa (por exemplo, imprime o nome do blob)
for blob in blobs:
    print(f"Nome do Blob: {blob.name}")

# Lê o conteúdo do blob
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob.name)
conteudo_blob = blob_client.download_blob()


Nome do Blob: CSV
Nome do Blob: CSV/Channel.csv
Nome do Blob: CSV/Geography.csv
Nome do Blob: CSV/Product.csv
Nome do Blob: CSV/ProductCategory.csv
Nome do Blob: CSV/Stores.csv
Nome do Blob: Carros.csv
Nome do Blob: Excel
Nome do Blob: Processados
Nome do Blob: Processados/Calendar.csv
Nome do Blob: Processados/Channel.csv
Nome do Blob: Processados/Geography.csv
Nome do Blob: Processados/Product.csv
Nome do Blob: Processados/ProductCategory.csv
Nome do Blob: Processados/ProductSubcategory.csv
Nome do Blob: Processados/Promotion.csv
Nome do Blob: Processados/Sales.csv
Nome do Blob: Processados/Stores.csv
Nome do Blob: raw


Conexão com o Azure SQL

In [91]:
#importando as bibliotecas
from sqlalchemy import create_engine
import pyodbc
from azure.storage.blob import BlobType
import time

In [92]:
# Configurações de conexão
server_name = "seu_servidor.database.windows.net"
database_name = "seu_banco_de_dados"
username = "seu_usuario"
password = "sua_senha"

In [93]:
def criar_conexao_azure_sql(server_name, database_name, username, password):
    # Define a string de conexão
    connection_string = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server_name};DATABASE={database_name};UID={username};PWD={password};"

    # Cria a conexão
    conexao = pyodbc.connect(connection_string)

    return conexao

In [94]:

# Cria a string de conexão
connection_string = f"mssql+pyodbc://{username}:{password}@{server_name}/{database_name}?driver=ODBC+Driver+17+for+SQL+Server"

# Cria uma engine de conexão usando SQLAlchemy
engine = create_engine(connection_string, echo=False)


In [95]:
# Inserir dados na tabela stg_stores
df_stores.to_sql(name="stg_stores", con=engine, if_exists="replace", index=False)
print('Tabela carregada')

Tabela carregada


In [96]:
# Inserir dados na tabela stg_Product
df_product.to_sql(name="stg_product", con=engine, if_exists="replace", index=False)
print('Tabela carregada')

Tabela carregada


In [97]:
# Inserir dados na tabela stg_channel
df_channel.to_sql(name="stg_channel", con=engine, if_exists="replace", index=False)
print('Tabela carregada')

Tabela carregada


In [98]:
# Inserir dados na tabela stg_geography
df_geography.to_sql(name="stg_geography", con=engine, if_exists="replace", index=False)
print('Tabela carregada')

Tabela carregada


In [99]:
# Inserir dados na tabela stg_product_category
df_product_category.to_sql(name="stg_product_category", con=engine, if_exists="replace", index=False)
print('Tabela carregada')

Tabela carregada


In [100]:
# Fechar a conexão
engine.dispose()

Apaga e move os arquivos para outra pasta

In [103]:
def mover_arquivos_para_processados(account_key, account_name, container_name, pasta_origem, pasta_destino):
    
    for blob in container_client.list_blobs(name_starts_with=f"{pasta_origem}/"):
        novo_blob_name = blob.name.replace(pasta_origem, pasta_destino, 1)
        novo_blob_client = container_client.get_blob_client(novo_blob_name)
        
        # Copia o blob para a pasta de destino
        novo_blob_client.start_copy_from_url(f"https://{account_name}.blob.core.windows.net/{container_name}/{blob.name}")
                
        # Deleta o blob original
        container_client.get_blob_client(blob.name).delete_blob()

mover_arquivos_para_processados(account_key, account_name, container_name, "CSV", "Processados")
