# ðŸŸ« Camada Bronze â€” AnÃ¡lise ExploratÃ³ria dos dados brutos
ExploraÃ§Ã£o dos dados brutos ingeridos da Open Brewery DB API.

In [1]:
import json
import os
import pandas as pd
from glob import glob

# Ajuste o caminho conforme sua mÃ¡quina
BRONZE_PATH = '../data/bronze'

files = sorted(glob(os.path.join(BRONZE_PATH, 'breweries_raw_*.json')))
print(f'Arquivos encontrados: {len(files)}')
for f in files:
    size_kb = os.path.getsize(f) / 1024
    print(f'  {os.path.basename(f)} â€” {size_kb:.1f} KB')

Arquivos encontrados: 1
  breweries_raw_20260227_162139.json â€” 4725.9 KB


In [2]:
# Carrega o arquivo mais recente
latest = files[-1]
with open(latest, 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

print(f'Total de registros: {len(raw_data)}')
print(f'\nExemplo de registro bruto:')
print(json.dumps(raw_data[0], indent=2))

Total de registros: 9251

Exemplo de registro bruto:
{
  "id": "5128df48-79fc-4f0f-8b52-d06be54d0cec",
  "name": "(405) Brewing Co",
  "brewery_type": "micro",
  "address_1": "1716 Topeka St",
  "address_2": null,
  "address_3": null,
  "city": "Norman",
  "state_province": "Oklahoma",
  "postal_code": "73069-8224",
  "country": "United States",
  "longitude": -97.46818222,
  "latitude": 35.25738891,
  "phone": "4058160490",
  "website_url": "http://www.405brewing.com",
  "state": "Oklahoma",
  "street": "1716 Topeka St"
}


In [3]:
# Converte para DataFrame para facilitar a exploraÃ§Ã£o
df = pd.DataFrame(raw_data)
print(f'Shape: {df.shape}')
print(f'\nColunas disponÃ­veis:')
print(df.columns.tolist())

Shape: (9251, 16)

Colunas disponÃ­veis:
['id', 'name', 'brewery_type', 'address_1', 'address_2', 'address_3', 'city', 'state_province', 'postal_code', 'country', 'longitude', 'latitude', 'phone', 'website_url', 'state', 'street']


In [4]:
# Primeiros registros
df.head(10)

Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,longitude,latitude,phone,website_url,state,street
0,5128df48-79fc-4f0f-8b52-d06be54d0cec,(405) Brewing Co,micro,1716 Topeka St,,,Norman,Oklahoma,73069-8224,United States,-97.468182,35.257389,4058160490,http://www.405brewing.com,Oklahoma,1716 Topeka St
1,9c5a66c8-cc13-416f-a5d9-0a769c87d318,(512) Brewing Co,micro,407 Radam Ln Ste F200,,,Austin,Texas,78745-1197,United States,,,5129211545,http://www.512brewing.com,Texas,407 Radam Ln Ste F200
2,34e8c68b-6146-453f-a4b9-1f6cd99a5ada,1 of Us Brewing Company,micro,8100 Washington Ave,,,Mount Pleasant,Wisconsin,53406-3920,United States,-87.883364,42.720108,2624847553,https://www.1ofusbrewing.com,Wisconsin,8100 Washington Ave
3,6d14b220-8926-4521-8d19-b98a2d6ec3db,10 Barrel Brewing Co,large,62970 18th St,,,Bend,Oregon,97701-9847,United States,-121.281706,44.086835,5415851007,http://www.10barrel.com,Oregon,62970 18th St
4,e2e78bd8-80ff-4a61-a65c-3bfbd9d76ce2,10 Barrel Brewing Co,large,1135 NW Galveston Ave Ste B,,,Bend,Oregon,97703-2465,United States,-121.328802,44.057565,5415851007,,Oregon,1135 NW Galveston Ave Ste B
5,e432899b-7f58-455f-9c7b-9a6e2130a1e0,10 Barrel Brewing Co,large,1411 NW Flanders St,,,Portland,Oregon,97209-2620,United States,-122.685506,45.525979,5032241700,http://www.10barrel.com,Oregon,1411 NW Flanders St
6,ef970757-fe42-416f-931d-722451f1f59c,10 Barrel Brewing Co,large,1501 E St,,,San Diego,California,92101-6618,United States,-117.129593,32.714813,6195782311,http://10barrel.com,California,1501 E St
7,9f1852da-c312-42da-9a31-097bac81c4c0,10 Barrel Brewing Co - Bend Pub,large,62950 NE 18th St,,,Bend,Oregon,97701,United States,-121.280954,44.091211,5415851007,,Oregon,62950 NE 18th St
8,ea4f30c0-bce6-416b-8904-fab4055a7362,10 Barrel Brewing Co - Boise,large,826 W Bannock St,,,Boise,Idaho,83702-5857,United States,-116.202929,43.618516,2083445870,http://www.10barrel.com,Idaho,826 W Bannock St
9,1988eb86-f0a2-4674-ba04-02454efa0d31,10 Barrel Brewing Co - Denver,large,2620 Walnut St,,,Denver,Colorado,80205-2231,United States,-104.985366,39.759251,7205738992,,Colorado,2620 Walnut St


In [5]:
# Tipos de dados e valores nulos
print('=== Tipos de dados ===')
print(df.dtypes)
print('\n=== Valores nulos por coluna ===')
nulls = df.isnull().sum()
print(nulls[nulls > 0].sort_values(ascending=False))

=== Tipos de dados ===
id                 object
name               object
brewery_type       object
address_1          object
address_2          object
address_3          object
city               object
state_province     object
postal_code        object
country            object
longitude         float64
latitude          float64
phone              object
website_url        object
state              object
street             object
dtype: object

=== Valores nulos por coluna ===
address_3      9212
address_2      8974
longitude      2290
latitude       2290
website_url    1222
phone           946
address_1       739
street          739
dtype: int64


In [6]:
# DistribuiÃ§Ã£o por paÃ­s
print('=== Breweries por paÃ­s ===')
print(df['country'].value_counts())

=== Breweries por paÃ­s ===
United States    8175
Australia         514
Canada            119
South Africa      104
Ireland            70
England            62
South Korea        61
Poland             34
Singapore          33
Austria            15
Portugal           14
Japan              10
Germany            10
Sweden             10
Scotland           10
Italy               4
France              3
Isle of Man         2
Ukraine             1
Name: country, dtype: int64


In [7]:
# DistribuiÃ§Ã£o por tipo
print('=== Breweries por tipo ===')
print(df['brewery_type'].value_counts())

=== Breweries por tipo ===
micro         4911
brewpub       2620
planning       649
closed         361
regional       230
contract       184
large          113
proprietor      68
taproom         45
bar             37
nano            22
cidery           7
beergarden       3
location         1
Name: brewery_type, dtype: int64


In [8]:
# Campos com dados faltantes em %
print('=== % de valores nulos por coluna ===')
missing_pct = (df.isnull().sum() / len(df) * 100).sort_values(ascending=False)
print(missing_pct[missing_pct > 0].round(2).to_string())

=== % de valores nulos por coluna ===
address_3      99.58
address_2      97.01
longitude      24.75
latitude       24.75
website_url    13.21
phone          10.23
address_1       7.99
street          7.99
