# Ingest Breweries
Sample code for ingesting data from the OpenBreweryDB API.  See https://www.openbrewerydb.org/documentation for API details.

In [0]:
# Get a list of breweries with a page size of 10.
import requests, json
import pandas as pd

request_params = {"per_page" : 10}

api_response = requests.get(
    "https://api.openbrewerydb.org/v1/breweries",
    params=request_params
)

print(api_response.content)


In [0]:

# Convert the JSON to a PySpark dataframe.
json_data = json.loads(api_response.content)
print(json_data)

# Use Pandas as an interim step to convert to Spark DataFrame.
breweries_pdf = pd.DataFrame(json_data)
breweries_df = spark.createDataFrame(breweries_pdf)
display(breweries_df)

In [0]:
# Save this sample.
# Create the destination schema if needed.  The 'bronze' schema aligns to the raw ingestion tier of the medallion architecture.
spark.sql("CREATE SCHEMA IF NOT EXISTS bronze_examples")

# Store pitching_df in a table in the bronze schema
breweries_df.write.mode("overwrite").saveAsTable("bronze_examples.breweries")

In [0]:
# Find closed breweries.
page_size = 100

request_params = {"per_page" : page_size,
                  "by_type" : "closed"}

api_response = requests.get(
    "https://api.openbrewerydb.org/v1/breweries",
    params=request_params
)

# Convert the JSON to a PySpark dataframe.
json_data = json.loads(api_response.content)

# Use Pandas as an interim step to convert to Spark DataFrame.
breweries_closed_pdf = pd.DataFrame(json_data)
breweries_closed_df = spark.createDataFrame(breweries_closed_pdf)
display(breweries_closed_df)

In [0]:
# Did we get all of them?
if breweries_closed_df.count() < page_size:
    print("Found all closed breweries.")
else:
    print("There are more closed breweries; we'll need to page through the results.")