In [82]:
import requests
import pandas as pd
import os
from datetime import datetime

# Endpoint da API
url = "https://api.openbrewerydb.org/breweries"

# Fazer uma requisição GET
response = requests.get(url)

if response.status_code == 200:
    # Dados brutos da API
    breweries_data = response.json()
    
    # Criar um DataFrame com os dados brutos
    df = pd.DataFrame(breweries_data)
    
    # Configurar o diretório de saída para a camada bronze
    bronze_path = "./bronze/"
    os.makedirs(bronze_path, exist_ok=True)
    
    # Nome do arquivo com timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    file_name = f"brewery_data_{timestamp}.parquet"
    
    # Salvar os dados brutos no formato Parquet
    df.to_parquet(os.path.join(bronze_path, file_name), index=False)
    
    print(f"Dados da camada bronze salvos em: {os.path.join(bronze_path, file_name)}")
else:
    print(f"Erro ao acessar a API: {response.status_code}")


Dados da camada bronze salvos em: ./bronze/brewery_data_20241127_141705.parquet


In [83]:
import pandas as pd

bronze_path = "./bronze/brewery_data_20241123_151511.parquet"


df = pd.read_parquet(bronze_path)

print(df.head())

                                     id                     name brewery_type  \
0  5128df48-79fc-4f0f-8b52-d06be54d0cec         (405) Brewing Co        micro   
1  9c5a66c8-cc13-416f-a5d9-0a769c87d318         (512) Brewing Co        micro   
2  34e8c68b-6146-453f-a4b9-1f6cd99a5ada  1 of Us Brewing Company        micro   
3  ef970757-fe42-416f-931d-722451f1f59c     10 Barrel Brewing Co        large   
4  6d14b220-8926-4521-8d19-b98a2d6ec3db     10 Barrel Brewing Co        large   

               address_1 address_2 address_3            city state_province  \
0         1716 Topeka St      None      None          Norman       Oklahoma   
1  407 Radam Ln Ste F200      None      None          Austin          Texas   
2    8100 Washington Ave      None      None  Mount Pleasant      Wisconsin   
3              1501 E St      None      None       San Diego     California   
4          62970 18th St      None      None            Bend         Oregon   

  postal_code        country          

In [84]:
df['name'] = df['name'].str.replace(r"[()]", "", regex=True)
print(df)

                                      id  \
0   5128df48-79fc-4f0f-8b52-d06be54d0cec   
1   9c5a66c8-cc13-416f-a5d9-0a769c87d318   
2   34e8c68b-6146-453f-a4b9-1f6cd99a5ada   
3   ef970757-fe42-416f-931d-722451f1f59c   
4   6d14b220-8926-4521-8d19-b98a2d6ec3db   
5   e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2   
6   e432899b-7f58-455f-9c7b-9a6e2130a1e0   
7   9f1852da-c312-42da-9a31-097bac81c4c0   
8   ea4f30c0-bce6-416b-8904-fab4055a7362   
9   1988eb86-f0a2-4674-ba04-02454efa0d31   
10  1ecc330f-6275-42a5-b14e-00adbed62752   
11  7531dbd8-afc9-4b5b-95bc-7ece7f2c0bf3   
12  5ae467af-66dc-4d7f-8839-44228f89b596   
13  4ffda196-dd59-44a5-9eeb-5f7fd4b58f5a   
14  42aa37d5-8384-4ffe-8c81-7c982eff0384   
15  232e8f62-9afc-45f5-b4bc-582c26b5c43b   
16  08f78223-24f8-4b71-b381-ea19a5bd82df   
17  58293321-14ae-49d7-9a7b-08436c9e63a6   
18  e5f3e72a-fee2-4813-82cf-f2e53b439ae6   
19  d81ff708-b5d2-478f-af6a-6d40f5beb9ac   
20  fb94830f-6196-4f59-9189-c9060b778085   
21  0faa0fb2-fffa-416d-9eab-46f6

In [85]:
# higienizando os dados
# Nao foi utilizado o apply.Lambda pois sao poucos registros

df.loc[7, 'name'] = df.loc[7, 'name'].replace('Bend Pub', '').replace('-', '').strip()
df.loc[8, 'name'] = df.loc[8, 'name'].replace('Boise', '').replace('-', '').strip()
df.loc[9, 'name'] = df.loc[9, 'name'].replace('Denver', '').replace('-', '').strip()
df.loc[21, 'name'] = df.loc[21, 'name'].replace('Production Facility', '').replace('-', '').strip()
df.loc[37, 'name'] = df.loc[37, 'name'].replace('llc', '').replace(',', '').strip()

print("\nDepois da transformação:")



Depois da transformação:


In [86]:


# Substituir 'None' (string) e valores None (nulo) por 'Sem informação'
df['address_2'] = df['address_2'].replace([None, 'None'], 'Não informado')
df['address_3'] = df['address_3'].replace([None, 'None'], 'Não informado')
df['longitude'] = df['longitude'].replace([None, 'None'], 'Não informada') 
df['latitude'] = df['latitude'].replace([None, 'None'], 'Sem latitude informada')
df['phone'] = df['phone'].replace([None, 'None'], 'Sem telefone cadastrado')   
df['website_url'] = df['website_url'].replace([None, 'None'], 'Sem e-mail cadastrado')
df['street'] = df['street'].replace([None, 'None'], 'Não informado')  
print("\nDepois da substituição:")
print(df)



Depois da substituição:
                                      id                            name  \
0   5128df48-79fc-4f0f-8b52-d06be54d0cec                  405 Brewing Co   
1   9c5a66c8-cc13-416f-a5d9-0a769c87d318                  512 Brewing Co   
2   34e8c68b-6146-453f-a4b9-1f6cd99a5ada         1 of Us Brewing Company   
3   ef970757-fe42-416f-931d-722451f1f59c            10 Barrel Brewing Co   
4   6d14b220-8926-4521-8d19-b98a2d6ec3db            10 Barrel Brewing Co   
5   e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2            10 Barrel Brewing Co   
6   e432899b-7f58-455f-9c7b-9a6e2130a1e0            10 Barrel Brewing Co   
7   9f1852da-c312-42da-9a31-097bac81c4c0            10 Barrel Brewing Co   
8   ea4f30c0-bce6-416b-8904-fab4055a7362            10 Barrel Brewing Co   
9   1988eb86-f0a2-4674-ba04-02454efa0d31            10 Barrel Brewing Co   
10  1ecc330f-6275-42a5-b14e-00adbed62752  10 Torr Distilling and Brewing   
11  7531dbd8-afc9-4b5b-95bc-7ece7f2c0bf3           10-56 Brewin

In [87]:
import os
import pandas as pd

# Diretórios Bronze e Silver
bronze_dir = './bronze/brewery_data_20241123_151511.parquet'
silver_dir = './silver'
os.makedirs(silver_dir, exist_ok=True)

# Caminho do arquivo no Bronze
bronze_path = "./bronze/brewery_data_20241123_151511.parquet"

# Caminho do arquivo no Silver
silver_file_path = os.path.join(silver_dir, 'breweries_transformed.parquet')

 # Verificar se o arquivo existe no Bronze
if os.path.exists(bronze_path):
    # Carregar os dados do Bronze
#    df = pd.read_parquet(bronze_path)
    

    
    # Salvar os dados transformados no Silver
    df.to_parquet(silver_file_path, index=False)
    print(f"Dados transformados e salvos com sucesso em {silver_file_path}")
else:
    print(f"Arquivo {bronze_file_path} não encontrado no diretório Bronze.")

print(df)

Dados transformados e salvos com sucesso em ./silver\breweries_transformed.parquet
                                      id                            name  \
0   5128df48-79fc-4f0f-8b52-d06be54d0cec                  405 Brewing Co   
1   9c5a66c8-cc13-416f-a5d9-0a769c87d318                  512 Brewing Co   
2   34e8c68b-6146-453f-a4b9-1f6cd99a5ada         1 of Us Brewing Company   
3   ef970757-fe42-416f-931d-722451f1f59c            10 Barrel Brewing Co   
4   6d14b220-8926-4521-8d19-b98a2d6ec3db            10 Barrel Brewing Co   
5   e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2            10 Barrel Brewing Co   
6   e432899b-7f58-455f-9c7b-9a6e2130a1e0            10 Barrel Brewing Co   
7   9f1852da-c312-42da-9a31-097bac81c4c0            10 Barrel Brewing Co   
8   ea4f30c0-bce6-416b-8904-fab4055a7362            10 Barrel Brewing Co   
9   1988eb86-f0a2-4674-ba04-02454efa0d31            10 Barrel Brewing Co   
10  1ecc330f-6275-42a5-b14e-00adbed62752  10 Torr Distilling and Brewing   
11  7

In [88]:
import pandas as pd

silver_file_path = "./silver/breweries_transformed.parquet"

# Ler apenas a partição 'state=california'
df = pd.read_parquet(silver_file_path)

print(df.head(10))

                                     id                     name brewery_type  \
0  5128df48-79fc-4f0f-8b52-d06be54d0cec           405 Brewing Co        micro   
1  9c5a66c8-cc13-416f-a5d9-0a769c87d318           512 Brewing Co        micro   
2  34e8c68b-6146-453f-a4b9-1f6cd99a5ada  1 of Us Brewing Company        micro   
3  ef970757-fe42-416f-931d-722451f1f59c     10 Barrel Brewing Co        large   
4  6d14b220-8926-4521-8d19-b98a2d6ec3db     10 Barrel Brewing Co        large   
5  e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2     10 Barrel Brewing Co        large   
6  e432899b-7f58-455f-9c7b-9a6e2130a1e0     10 Barrel Brewing Co        large   
7  9f1852da-c312-42da-9a31-097bac81c4c0     10 Barrel Brewing Co        large   
8  ea4f30c0-bce6-416b-8904-fab4055a7362     10 Barrel Brewing Co        large   
9  1988eb86-f0a2-4674-ba04-02454efa0d31     10 Barrel Brewing Co        large   

                     address_1      address_2      address_3            city  \
0               1716 Topeka 

In [89]:
import os
import pandas as pd

# Diretórios Silver e Gold
silver_file_path = './silver/breweries_transformed.parquet'
gold_dir = './gold'
os.makedirs(gold_dir, exist_ok=True)

# Caminho do arquivo no Gold
gold_file_path = os.path.join(gold_dir, 'breweries_transformed.parquet')

# Verifique se o arquivo Silver existe
if os.path.exists(silver_file_path):
    try:
        # Carregar os dados do Silver
      #  df_transformed = pd.read_parquet(silver_file_path)

        # Limpeza e transformação dos dados
        # Exemplo 1: Remover duplicatas
        df_transformed = df_transformed.drop_duplicates()

        # Salvar os dados no formato Parquet no diretório Gold
        df_transformed.to_parquet(gold_file_path, index=False)
        print(f"Dados processados e salvos com sucesso em {gold_file_path}")
    except Exception as e:
        print(f"Ocorreu um erro durante o processamento: {e}")
else:
    print(f"Arquivo {silver_file_path} não encontrado.")

print(df)


Ocorreu um erro durante o processamento: name 'df_transformed' is not defined
                                      id                            name  \
0   5128df48-79fc-4f0f-8b52-d06be54d0cec                  405 Brewing Co   
1   9c5a66c8-cc13-416f-a5d9-0a769c87d318                  512 Brewing Co   
2   34e8c68b-6146-453f-a4b9-1f6cd99a5ada         1 of Us Brewing Company   
3   ef970757-fe42-416f-931d-722451f1f59c            10 Barrel Brewing Co   
4   6d14b220-8926-4521-8d19-b98a2d6ec3db            10 Barrel Brewing Co   
5   e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2            10 Barrel Brewing Co   
6   e432899b-7f58-455f-9c7b-9a6e2130a1e0            10 Barrel Brewing Co   
7   9f1852da-c312-42da-9a31-097bac81c4c0            10 Barrel Brewing Co   
8   ea4f30c0-bce6-416b-8904-fab4055a7362            10 Barrel Brewing Co   
9   1988eb86-f0a2-4674-ba04-02454efa0d31            10 Barrel Brewing Co   
10  1ecc330f-6275-42a5-b14e-00adbed62752  10 Torr Distilling and Brewing   
11  7531db

In [90]:
import pandas as pd

gold_file_path = "./gold/breweries_transformed.parquet"

# Ler apenas a partição 'state=california'
df= pd.read_parquet(silver_file_path)

print(df.head())

                                     id                     name brewery_type  \
0  5128df48-79fc-4f0f-8b52-d06be54d0cec           405 Brewing Co        micro   
1  9c5a66c8-cc13-416f-a5d9-0a769c87d318           512 Brewing Co        micro   
2  34e8c68b-6146-453f-a4b9-1f6cd99a5ada  1 of Us Brewing Company        micro   
3  ef970757-fe42-416f-931d-722451f1f59c     10 Barrel Brewing Co        large   
4  6d14b220-8926-4521-8d19-b98a2d6ec3db     10 Barrel Brewing Co        large   

               address_1      address_2      address_3            city  \
0         1716 Topeka St  Não informado  Não informado          Norman   
1  407 Radam Ln Ste F200  Não informado  Não informado          Austin   
2    8100 Washington Ave  Não informado  Não informado  Mount Pleasant   
3              1501 E St  Não informado  Não informado       San Diego   
4          62970 18th St  Não informado  Não informado            Bend   

  state_province postal_code        country           longitude  \
0

In [94]:

save_directory = './'
os.makedirs(save_directory, exist_ok = True) # cria o diretorio se nao existir

#salvar o dataframe GOLD
file_path = os.path.join(save_directory, 'arquivo_final.csv')
df.to_parquet(file_path, index = False)

print(f"Dataframe salvo em '{file_path}'")

Dataframe salvo em './arquivo_final.csv'


In [97]:
#importando a biblioteca 
import glob

#listar os diretorios
directory = './'

#filtrar todos os arquivos no diretorio
files = glob.glob(os.path.join(directory, '*'))

files = [f for f in files if os.path.isfile(f)]

print("Arquivos no diretorio:", files)





Arquivos no diretorio: ['.\\AMDRM_Install.log', '.\\AMD_RyzenMaster.log', '.\\arquivo_final.csv', '.\\bronze_layer.json', '.\\CASE_TESTE.ipynb', '.\\data.csv', '.\\data.parquet', '.\\filtered_data.csv', '.\\NTUSER.DAT', '.\\ntuser.dat.LOG1', '.\\ntuser.dat.LOG2', '.\\NTUSER.DAT{1b23b9b6-ab9f-11ef-8829-74563cf22598}.TM.blf', '.\\NTUSER.DAT{1b23b9b6-ab9f-11ef-8829-74563cf22598}.TMContainer00000000000000000001.regtrans-ms', '.\\NTUSER.DAT{1b23b9b6-ab9f-11ef-8829-74563cf22598}.TMContainer00000000000000000002.regtrans-ms', '.\\ntuser.ini', '.\\Untitled.ipynb']


In [96]:
print(df)

                                      id                            name  \
0   5128df48-79fc-4f0f-8b52-d06be54d0cec                  405 Brewing Co   
1   9c5a66c8-cc13-416f-a5d9-0a769c87d318                  512 Brewing Co   
2   34e8c68b-6146-453f-a4b9-1f6cd99a5ada         1 of Us Brewing Company   
3   ef970757-fe42-416f-931d-722451f1f59c            10 Barrel Brewing Co   
4   6d14b220-8926-4521-8d19-b98a2d6ec3db            10 Barrel Brewing Co   
5   e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2            10 Barrel Brewing Co   
6   e432899b-7f58-455f-9c7b-9a6e2130a1e0            10 Barrel Brewing Co   
7   9f1852da-c312-42da-9a31-097bac81c4c0            10 Barrel Brewing Co   
8   ea4f30c0-bce6-416b-8904-fab4055a7362            10 Barrel Brewing Co   
9   1988eb86-f0a2-4674-ba04-02454efa0d31            10 Barrel Brewing Co   
10  1ecc330f-6275-42a5-b14e-00adbed62752  10 Torr Distilling and Brewing   
11  7531dbd8-afc9-4b5b-95bc-7ece7f2c0bf3           10-56 Brewing Company   
12  5ae467af