###Day 3: Transformations for Gold Tables

In [0]:
from pyspark.sql.functions import col, sum as _sum, max as _max

#Use Silver Schema
spark.sql("USE CATALOG databricks_cat")
spark.sql("USE SCHEMA silver")

df_owid = spark.table("owid_subset_cleaned")
df_vacc = spark.table("vaccinations_by_manufacturer_cleaned")

# Show sample data
df_owid.display()

# Show sample data from manufacturer data
df_vacc.display()

##### Create Gold-level aggregated table: Daily total vaccinations by country


In [0]:
from pyspark.sql.functions import sum as _sum
# Read from Silver schema using fully qualified name
df_owid = spark.table("databricks_cat.silver.owid_subset_cleaned")
df_vacc = spark.table("databricks_cat.silver.vaccinations_by_manufacturer_cleaned")

# Transform
df_tot_vacc = df_owid.groupBy("location", "date") \
    .agg(_sum("people_vaccinated").alias("people_vaccinated"))

df_vacc_by_manufacturer = df_vacc.groupBy("date","vaccine", "location") \
            .agg(_sum("total_vaccinations").alias("total_vaccinations"))

# Switch to Gold schema
spark.sql("USE SCHEMA gold")

# Save to Gold tables
df_tot_vacc.write \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("daily_country_vaccinations")

df_vacc_by_manufacturer.write \
    .format("delta") \
    .option("overwriteSchema", "true") \
    .mode("overwrite") \
    .saveAsTable("vaccinations_by_manufacturer")

In [0]:
# Show Gold Table Preview
spark.sql("SELECT * FROM daily_country_vaccinations LIMIT 5").show()
spark.sql("SELECT * FROM vaccinations_by_manufacturer LIMIT 5").show()

+-------------+----------+-----------------+
|     location|      date|people_vaccinated|
+-------------+----------+-----------------+
|United States|2021-11-04|      2.2325143E8|
|       France|2021-08-21|      4.7987077E7|
|United States|2021-02-11|      4.1179619E7|
|        Italy|2021-08-31|      4.2819383E7|
|United States|2021-03-21|      9.0645717E7|
+-------------+----------+-----------------+

+------------------+--------+------------------+
|           vaccine|location|total_vaccinations|
+------------------+--------+------------------+
|   Pfizer/BioNTech|   Italy|   7.5090024728E10|
|           Novavax|   Italy|         1.16305E7|
|Oxford/AstraZeneca|   Spain|      5.19977338E8|
|           Novavax| Germany|       5.0932135E7|
|   Johnson&Johnson| Germany|     2.184070823E9|
+------------------+--------+------------------+

