### Gold Notebook
This notebook reads silver tables, combines them into one and stores them in gold table.

In [0]:
df_visits = spark.table("silver.avohilmo.visits_processed")
df_customers = spark.table("silver.avohilmo.customers_processed")
df_visits_customers = spark.table("silver.avohilmo.visits_customers_processed")

In [0]:
from pyspark.sql.functions import coalesce
from pyspark.sql.functions import col

df_gold = (
    df_visits.alias("v")
    .join(df_customers.alias("c"),
          ["region_name", "profession_name", "service_type_name", "year"],
          "outer")
    .join(df_visits_customers.alias("r"),
          ["region_name", "profession_name", "service_type_name", "year"],
          "outer")
    .select(
        coalesce(df_visits.region_name, df_customers.region_name, df_visits_customers.region_name).alias("area"),
        coalesce(df_visits.profession_name, df_customers.profession_name, df_visits_customers.profession_name).alias("profession"),
        coalesce(df_visits.service_type_name, df_customers.service_type_name, df_visits_customers.service_type_name).alias("service_type"),
        coalesce(df_visits.year, df_customers.year, df_visits_customers.year).alias("year"),
        df_visits.value.alias("visits"),
        df_customers.value.alias("customers"),
        df_visits_customers.value.alias("visits_per_customer")
    )
)


In [0]:
# Setting up the catalog and database
spark.catalog.setCurrentCatalog("gold")
spark.catalog.setCurrentDatabase("avohilmo")

In [0]:
%sql
-- Create an empty table with data types
CREATE TABLE IF NOT EXISTS aggregated_data (
  area STRING,
  profession STRING,
  service_type STRING,
  year INT,
  visits INT,
  customers INT,
  visits_per_customer DOUBLE
);

In [0]:
df_gold.write.mode("overwrite").saveAsTable("gold.avohilmo.aggregated_data")

### Querying the data to see if it exists

In [0]:
%sql
select * from gold.avohilmo.aggregated_data

In [0]:
%sql
select sum(visits)
from gold.avohilmo.aggregated_data
where year = 2015 and profession = 'Lääkärit' and area = 'Ulkomaat'