In [0]:

# Ingestion via pandas via url/api and visualization of total and types
import pyspark.sql.functions as F
import pandas as pd
import requests

base_url = 'https://api.openbrewerydb.org/breweries'			

def _get_request(params=None):
    response = requests.get(base_url, params=params)
    return response

def _get_data(params=None):
    r = _get_request(params=params)
    json = r.json()
    if json:
        return pd.DataFrame(json)
    else:
        return pd.DataFrame()


df = pd.DataFrame(data=_get_data(params=None))
df.count()
df.info()
df.head(4)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              50 non-null     object
 1   name            50 non-null     object
 2   brewery_type    50 non-null     object
 3   address_1       47 non-null     object
 4   address_2       1 non-null      object
 5   address_3       0 non-null      object
 6   city            50 non-null     object
 7   state_province  50 non-null     object
 8   postal_code     50 non-null     object
 9   country         50 non-null     object
 10  longitude       40 non-null     object
 11  latitude        40 non-null     object
 12  phone           45 non-null     object
 13  website_url     39 non-null     object
 14  state           50 non-null     object
 15  street          47 non-null     object
dtypes: object(16)
memory usage: 6.4+ KB


Unnamed: 0,id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,longitude,latitude,phone,website_url,state,street
0,5128df48-79fc-4f0f-8b52-d06be54d0cec,(405) Brewing Co,micro,1716 Topeka St,,,Norman,Oklahoma,73069-8224,United States,-97.46818222,35.25738891,4058160490,http://www.405brewing.com,Oklahoma,1716 Topeka St
1,9c5a66c8-cc13-416f-a5d9-0a769c87d318,(512) Brewing Co,micro,407 Radam Ln Ste F200,,,Austin,Texas,78745-1197,United States,,,5129211545,http://www.512brewing.com,Texas,407 Radam Ln Ste F200
2,34e8c68b-6146-453f-a4b9-1f6cd99a5ada,1 of Us Brewing Company,micro,8100 Washington Ave,,,Mount Pleasant,Wisconsin,53406-3920,United States,-87.88336350209435,42.72010826899558,2624847553,https://www.1ofusbrewing.com,Wisconsin,8100 Washington Ave
3,ef970757-fe42-416f-931d-722451f1f59c,10 Barrel Brewing Co,large,1501 E St,,,San Diego,California,92101-6618,United States,-117.129593,32.714813,6195782311,http://10barrel.com,California,1501 E St


In [0]:
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, FloatType
from pyspark.sql import SparkSession

# Creating spark session to transform pandas into pyspark
spark = SparkSession.builder.appName("Breweries Case").getOrCreate()

# Create User defined Custom Schema using StructType
mySchema = StructType([ StructField("Id", StringType(), True)\
                       ,StructField("name", StringType(), True)
                       ,StructField("brewery_type", StringType(), True)
                       ,StructField("address_1", StringType(), True)
                       ,StructField("address_2", StringType(), True)
                       ,StructField("address_3", StringType(), True)
                       ,StructField("city", StringType(), True)
                       ,StructField("state_province", StringType(), True)
                       ,StructField("postal_code", StringType(), True)
                       ,StructField("country", StringType(), True)
                       ,StructField("longitude", StringType(), True)
                       ,StructField("latitude", StringType(), True)
                       ,StructField("phone", StringType(), True)
                       ,StructField("website_url", StringType(), True)					   
                       ,StructField("state", StringType(), True)
                       ,StructField("street", StringType(), True)					   
					   
					   ]					   
					   )
# Enable Arrow-based columnar data transfers
# spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")
df_beer = spark.createDataFrame(df,schema=mySchema,verifySchema=False )
df_beer.printSchema()
df_beer.show()
df_beer.createOrReplaceTempView("vw_breweries")

 



root
 |-- Id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- brewery_type: string (nullable = true)
 |-- address_1: string (nullable = true)
 |-- address_2: string (nullable = true)
 |-- address_3: string (nullable = true)
 |-- city: string (nullable = true)
 |-- state_province: string (nullable = true)
 |-- postal_code: string (nullable = true)
 |-- country: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)
 |-- phone: string (nullable = true)
 |-- website_url: string (nullable = true)
 |-- state: string (nullable = true)
 |-- street: string (nullable = true)

+--------------------+--------------------+------------+--------------------+---------+---------+--------------+--------------+-----------+-------------+------------------+-----------------+------------+--------------------+-------------+--------------------+
|                  Id|                name|brewery_type|           address_1|address_2|address_3|

In [0]:

%sql

CREATE DATABASE IF NOT EXISTS brewery_bronze
LOCATION "/FileStore/bronze"

In [0]:
%sql
CREATE DATABASE IF NOT EXISTS brewery_silver
LOCATION "/FileStore/silver"

In [0]:
%sql

CREATE DATABASE IF NOT EXISTS brewery_gold
LOCATION "/FileStore/gold"

In [0]:
%sql

CREATE TABLE IF NOT EXISTS brewery_bronze.breweries(
id              STRING,     
name            STRING,
brewery_type    STRING,
address_1       STRING,
address_2       STRING,
address_3       STRING,
city            STRING,
state_province  STRING,
postal_code     STRING,
country         STRING,
longitude       FLOAT,
latitude        FLOAT,
phone           STRING,
website_url     STRING,
state           STRING,
street          STRING,
updatetime      DATE 
)
USING DELTA
LOCATION "/FileStore/bronze"

In [0]:
%sql

CREATE TABLE IF NOT EXISTS brewery_silver.breweries(
id              STRING,     
name            STRING,
brewery_type    STRING,
city            STRING,
state_province  STRING,
postal_code     STRING,
country         STRING,
longitude       FLOAT,
latitude        FLOAT,
phone           STRING,
website_url     STRING,
state           STRING,
street          STRING,
updatetime      DATE ,
address       STRING
)
USING DELTA
LOCATION "/FileStore/silver"


In [0]:
%sql


CREATE TABLE IF NOT EXISTS brewery_gold.breweries(
brewery_type    STRING,
city            STRING,
qtde            INTEGER
)
USING DELTA
LOCATION "/FileStore/gold"

In [0]:
%sql
MERGE INTO  brewery_bronze.breweries tgt
USING vw_breweries upd
ON ( tgt.id = upd.id )      
WHEN MATCHED THEN
  UPDATE SET                         
       tgt.name            = upd.name            
      ,tgt.brewery_type    = upd.brewery_type    
      ,tgt.address_1       = upd.address_1       
      ,tgt.address_2       = upd.address_2       
      ,tgt.address_3       = upd.address_3       
      ,tgt.city            = upd.city            
      ,tgt.state_province  = upd.state_province  
      ,tgt.postal_code     = upd.postal_code     
      ,tgt.country         = upd.country         
      ,tgt.longitude       = upd.longitude       
      ,tgt.latitude        = upd.latitude        
      ,tgt.phone           = upd.phone           
      ,tgt.website_url     = upd.website_url     
      ,tgt.state           = upd.state           
      ,tgt.street          = upd.street   
      ,tgt.updatetime      = current_timestamp
WHEN NOT MATCHED
      THEN INSERT (  id            ,name          
					,brewery_type  ,address_1     
					,address_2     ,address_3     
					,city          ,state_province
					,postal_code   ,country       
					,longitude     ,latitude      
					,phone         ,website_url   
					,state         ,street  ,  updatetime  )
   values (  id            ,name          
			,brewery_type  ,address_1     
			,address_2     ,address_3     
			,city          ,state_province
			,postal_code   ,country       
			,longitude     ,latitude      
			,phone         ,website_url   
			,state         ,street , current_timestamp )         

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
50,0,0,50


In [0]:
%sql
select * from brewery_bronze.breweries

id,name,brewery_type,address_1,address_2,address_3,city,state_province,postal_code,country,longitude,latitude,phone,website_url,state,street,updatetime
50521ef7-f543-4c5d-98b1-0d0ee1a2be01,12welve Eyes Brewing,micro,141 E 4th St Ste LL2,,,Saint Paul,Minnesota,55101-1639,United States,,,6514938106.0,http://www.12welveEyes.com,Minnesota,141 E 4th St Ste LL2,2024-10-29
950180bd-29c9-46b3-ad0c-e6f09799ec7f,13 Below Brewery,micro,7391 Forbes Rd,,,Cincinnati,Ohio,45233-1013,United States,-84.706345,39.126396,5139750613.0,http://www.13belowbrewery.com,Ohio,7391 Forbes Rd,2024-10-29
45119c56-345b-4adc-b481-c5cf7bfe98c4,13 Stripes Brewery,brewpub,"250 Mill St, Suite PW3101",,,Taylors,South Carolina,29687,United States,,,8643491430.0,http://www.13StripesBrewery.com,South Carolina,"250 Mill St, Suite PW3101",2024-10-29
936c3d7e-5d54-4459-b72c-117cdda059b4,13 Virtues Brewing Co,brewpub,6410 SE Milwaukie Ave,,,Portland,Oregon,97202-5518,United States,-122.64875,45.476254,5032393831.0,http://www.13virtuesbrewing.com,Oregon,6410 SE Milwaukie Ave,2024-10-29
5c53b314-ebab-4e3e-89be-e4139d9318ae,1323 R & D,micro,1323 Capital Blvd 1323 R and D,,,Raleigh,North Carolina,27603-1117,United States,,,9199775654.0,http://www.1323rnd.com,North Carolina,1323 Capital Blvd 1323 R and D,2024-10-29
4788221a-a03b-458c-9084-4cadd69ade6d,14 Cannons Brewing Company,micro,31125 Via Colinas Ste 907,,,Westlake Village,California,91362-3974,United States,-118.8024,34.15334,8186996165.0,http://14cannons.com,California,31125 Via Colinas Ste 907,2024-10-29
e5f3e72a-fee2-4813-82cf-f2e53b439ae6,12 Acres Brewing Company,micro,Unnamed Street,Clonmore,,Killeshin,Laois,R93 X3X8,Ireland,-6.979344,52.849308,353599107299.0,https://12acresbrewing.ie/,Laois,Unnamed Street,2024-10-29
d81ff708-b5d2-478f-af6a-6d40f5beb9ac,12 Gates Brewing Company,brewpub,80 Earhart Dr Ste 20,,,Williamsville,New York,14221-7804,United States,,,7169066600.0,http://www.12gatesbrewing.com,New York,80 Earhart Dr Ste 20,2024-10-29
fb94830f-6196-4f59-9189-c9060b778085,12 West Brewing Company,micro,3000 E Ray Rd Bldg 6,,,Gilbert,Arizona,85296-7832,United States,,,6023395014.0,http://www.12westbrewing.com,Arizona,3000 E Ray Rd Bldg 6,2024-10-29
0faa0fb2-fffa-416d-9eab-46f67477c8ef,12 West Brewing Company - Production Facility,micro,,,,Mesa,Arizona,85207,United States,-111.58607,33.436188,,,Arizona,,2024-10-29


In [0]:
# If it is necessary to activate the call.
#dbutils.notebook.run("2_silver_layer",60)