In [2]:
from pyspark.sql import SparkSession

In [6]:
spark = (
    SparkSession
    .builder
    .appName("wh-facts-dimensions-tables-iceberg")
    .master("spark://spark-master:7077")
    .config("spark.executor.cores", 1)
    .config("spark.cores.max", 6)
    .config("spark.executor.memory", "512M")
    .getOrCreate()
)

25/02/14 20:09:34 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [7]:
spark

In [8]:
%%sql

SHOW TABLES IN demo.SolarX_Raw_Transactions

namespace,tableName,isTemporary
SolarX_Raw_Transactions,home_power_readings,False
SolarX_Raw_Transactions,solar_panel,False
SolarX_Raw_Transactions,solar_panel_readings,False


# Create a new name-space/database for the warehouse

In [9]:
%%sql

CREATE DATABASE IF NOT EXISTS SolarX_WH

# Home Power Usage WH

In [10]:
%%sql

DESCRIBE SolarX_Raw_Transactions.home_power_readings

col_name,data_type,comment
timestamp,timestamp,
15_minutes_interval,int,
min_consumption_wh,float,
max_consumption_wh,float,
,,
# Partitioning,,
Part 0,days(timestamp),
Part 1,15_minutes_interval,


## Home readings dimension and fact tables

### Home dimension

In [6]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.dim_home PURGE

25/01/03 01:47:31 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
                                                                                

In [7]:
%%sql

CREATE TABLE SolarX_WH.dim_home(
    home_key                             SMALLINT    NOT NULL,
    home_id                              SMALLINT    NOT NULL,
    min_consumption_power_wh             FLOAT       NOT NULL,
    max_consumption_power_wh             FLOAT       NOT NULL,

    -- scd type2 for min_consumption_power_wh
    start_date                           TIMESTAMP   NOT NULL,
    end_date                             TIMESTAMP,

    current_flag                         BOOLEAN NOT NULL
)
USING iceberg;

In [44]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.dim_home_appliances PURGE

25/01/03 00:29:59 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [45]:
%%sql

CREATE TABLE SolarX_WH.dim_home_appliances(
    home_appliance_key                  SMALLINT    NOT NULL,
    home_key                            SMALLINT    NOT NULL, -- REFERENCES dim_home(home_key)
    appliance                           VARCHAR(25) NOT NULL,    
    min_consumption_power_wh            FLOAT       NOT NULL,
    max_consumption_power_wh            FLOAT       NOT NULL,
    usage_time                          VARCHAR(50) NOT NULL
)
USING iceberg;

### Home fact

In [27]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.fact_home_power_readings PURGE

In [28]:
%%sql

CREATE TABLE SolarX_WH.fact_home_power_readings(
    home_power_reading_key          TIMESTAMP     NOT NULL,
    home_key                        SMALLINT      NOT NULL,   -- REFERENCES dim_home(home_key)
    date_key                        TIMESTAMP     NOT NULL,   -- REFERENCES dim_date(date_key)

    min_consumption_power_wh        FLOAT         NOT NULL,
    max_consumption_power_wh        FLOAT         NOT NULL 
)

USING iceberg
PARTITIONED BY (MONTH(date_key))

## Solar panel readings dimension and fact tables

In [52]:
%%sql

DESCRIBE SolarX_Raw_Transactions.solar_panel

col_name,data_type,comment
id,int,
name,string,
capacity_kwh,float,
intensity_power_rating,float,
temperature_power_rating,float,


In [53]:
%%sql

DESCRIBE SolarX_Raw_Transactions.solar_panel_readings

col_name,data_type,comment
timestamp,timestamp,
15_minutes_interval,int,
panel_id,int,
generation_power_wh,float,
,,
# Partitioning,,
Part 0,days(timestamp),
Part 1,panel_id,
Part 2,15_minutes_interval,


### Solar panel dimension

In [16]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.dim_solar_panel PURGE

In [17]:
%%sql

CREATE TABLE SolarX_WH.dim_solar_panel(
    solar_panel_key                             INT         NOT NULL,
    solar_panel_id                              SMALLINT    NOT NULL,
    name                                        VARCHAR(20) NOT NULL,    
    capacity_kwh                                FLOAT       NOT NULL,
    intensity_power_rating_wh                   FLOAT       NOT NULL,
    temperature_power_rating_c                  FLOAT       NOT NULL,

    -- scd type2
    start_date                                  TIMESTAMP   NOT NULL,
    end_date                                    TIMESTAMP,

    current_flag                                BOOLEAN
)
USING iceberg;

### Solar panel fact

In [29]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.fact_solar_panel_power_readings PURGE

In [30]:
%%sql

CREATE TABLE SolarX_WH.fact_solar_panel_power_readings(
    solar_panel_key                 SMALLINT      NOT NULL,   -- REFERENCES dim_solar_panel(solar_panel_key)
    date_key                        TIMESTAMP     NOT NULL,   -- REFERENCES dim_date(date_key)
    
    solar_panel_id                  INT           NOT NULL,
    generation_power_wh             FLOAT         NOT NULL 
)

USING iceberg
PARTITIONED BY (MONTH(date_key), solar_panel_id)

## Battery readings dimension and fact tables

In [8]:
%%sql

DESCRIBE SolarX_Raw_Transactions.battery_readings

25/02/14 20:09:39 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


col_name,data_type,comment
timestamp,timestamp,
15_minutes_interval,int,
battery_name,string,
capacity_kwh,float,
max_charge_speed_w,float,
current_energy_wh,float,
is_charging,float,
status,string,
max_output_w,float,
,,


### Battery dimension

In [9]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.dim_battery PURGE

In [10]:
%%sql

CREATE TABLE SolarX_WH.dim_battery(
    battery_key                             INT         NOT NULL,
    battery_id                              SMALLINT    NOT NULL,
    name                                    VARCHAR(15) NOT NULL,    
    capacity_kwh                            FLOAT       NOT NULL,
    max_charge_speed_w                      FLOAT       NOT NULL,
    max_output_w                            FLOAT       NOT NULL,

    -- scd type2
    start_date                              TIMESTAMP   NOT NULL,
    end_date                                TIMESTAMP,

    current_flag                            BOOLEAN
)
USING iceberg;

### Battery fact

In [11]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.fact_battery_power_readings PURGE

In [1]:
%%sql

CREATE TABLE SolarX_WH.fact_battery_power_readings(
    battery_key                     SMALLINT      NOT NULL,   -- REFERENCES dim_batteryl(solar_panel_key)
    date_key                        TIMESTAMP     NOT NULL,   -- REFERENCES dim_date(date_key)
    
    battery_id                      SMALLINT      NOT NULL,
    current_energy_wh               FLOAT         NOT NULL,
    is_charging                     SMALLINT      NOT NULL,
    status                          VARCHAR(15)   NOT NULL 
)

USING iceberg
PARTITIONED BY (MONTH(date_key), battery_id)

25/02/14 21:35:52 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


## Date dimension table

In [8]:
%%sql

DROP TABLE IF EXISTS SolarX_WH.dim_date PURGE

In [10]:
%%sql

CREATE TABLE SolarX_WH.dim_date
(
    date_key            TIMESTAMP  NOT NULL,
    year                SMALLINT   NOT NULL,
    quarter             SMALLINT   NOT NULL,
    month               SMALLINT   NOT NULL,
    week                SMALLINT   NOT NULL,
    day                 SMALLINT   NOT NULL,
    hour                SMALLINT   NOT NULL,
    minute              SMALLINT   NOT NULL,
    is_weekend          BOOLEAN    NOT NULL
)

USING iceberg
PARTITIONED BY (month, minute)

In [5]:
spark.stop()