Creating gold database in mounted ADLS container bikesharedata

In [0]:
spark.sql('create database if not exists gold_db location "/mnt/bikesharestorage/bikesharedata/gold"')

Table payments_fact

In [0]:
spark.sql("""select 
payment_id, 
rider_id, 
replace(payment_date,'-','') as date_id,
payment_date,
cast(amount as decimal(10, 2))
from bronze_db.payments_bronze""").write.format('delta').mode('overwrite').saveAsTable('gold_db.payments_fact')

In [0]:
spark.sql(' select * from gold_db.payments_fact').show()

Out[22]: DataFrame[payment_id: int, rider_id: int, date_id: string, payment_date: date, amount: decimal(10,2)]

Table stations_dimn

In [0]:
spark.sql("""select station_id, 
name, 
latitude, 
longitude 
from bronze_db.stations_bronze""").write.format('delta').mode('overwrite').saveAsTable('gold_db.stations_dimn')

In [0]:
spark.sql('select * from gold_db.stations_dimn').show()

Out[15]: DataFrame[station_id: string, name: string, latitude: float, longitude: float]

Table trips_fact

In [0]:
spark.sql("""select 
t.trip_id,
t.rider_id, 
t.start_station_id, 
t.end_station_id, 
t.started_at as trip_start_time,
t.ended_at as trip_end_time,
timestampdiff(second, t.started_at, t.ended_at) as trip_duration_in_sec,
round(datediff(day, r.birthday, t.started_at)/365, 1) as rider_age_at_trip_start
from bronze_db.trips_bronze t
JOIN bronze_db.riders_bronze r
on t.rider_id = r.rider_id
""").write.format('delta').mode('overwrite').saveAsTable('gold_db.trips_fact')

In [0]:
spark.sql('select * from  gold_db.trips_fact').show()

Out[2]: DataFrame[]

Table riders_dimn

In [0]:
spark.sql("""select 
r.rider_id,
r.first,
r.last, 
r.address, 
r.birthday,
r.is_member, 
r.account_start_date,
r.account_end_date,
t.rideable_type 
from bronze_db.riders_bronze r 
JOIN bronze_db.trips_bronze t 
on t.rider_id = r.rider_id""").write.format('delta').mode('overwrite').saveAsTable('gold_db.riders_dimn')

In [0]:
spark.sql('select * from gold_db.riders_dimn').show()

Out[21]: DataFrame[rider_id: int, first: string, last: string, address: string, birthday: date, is_member: boolean, account_start_date: date, account_end_date: date, rideable_type: string]

Table date_dimn

In [0]:
beginDate = '2013-02-01'
endDate = '2040-12-31'

spark.sql(f"""select 
explode(sequence(to_date('{beginDate}'), 
to_date('{endDate}'), interval 1 day)) as calendarDate""") \
.createOrReplaceTempView('date_temp')

In [0]:
spark.sql("""SELECT
replace(calendarDate,'-','') as date_id,
p.payment_id as payment_date_key,
calendarDate AS calen_date,
YEAR(calendarDate) AS year,
QUARTER(calendarDate) AS quarter,
MONTH(calendarDate) AS month,
weekofyear(calendarDate) AS week,
DAY(calendarDate) AS day,
WEEKDAY(calendarDate) AS day_of_week,
date_format(calendarDate,'E') as day_name,
date_format(calendarDate,'MMMM') as month_name
from date_temp 
join bronze_db.payments_bronze p
ON calendarDate = p.payment_date
""").write.format('delta').mode('overwrite').saveAsTable('gold_db.date_dimn')

In [0]:
spark.sql('select * from gold_db.date_dimn').show()

Out[24]: DataFrame[date_id: string, payment_date_key: int, calen_date: date, year: int, quarter: int, month: int, week: int, day: int, day_of_week: int, day_name: string, month_name: string]