**This notebook should be run only once to create needed tables. yahoo_finance Unity catalog should be created first.**

In [0]:
%sql
USE CATALOG yahoo_finance;
CREATE SCHEMA IF NOT EXISTS bronze;
CREATE SCHEMA IF NOT EXISTS silver;
CREATE SCHEMA IF NOT EXISTS gold;
CREATE SCHEMA IF NOT EXISTS processrunlogs;

-- Table to store process run logs
DROP TABLE IF EXISTS processrunlogs.processrunlog;
CREATE TABLE IF NOT EXISTS processrunlogs.processrunlog (
    id BIGINT GENERATED ALWAYS AS IDENTITY, 
    ticker STRING, 
    processname STRING, 
    last_loaded_date TIMESTAMP, 
    startdate TIMESTAMP, 
    enddate TIMESTAMP, 
    status STRING
) USING DELTA
LOCATION 'abfss://processrunlogs@yahoofinancestorage.dfs.core.windows.net/processrunlog';

-- Table to store raw data
DROP TABLE IF EXISTS bronze.finance_bronze;
CREATE TABLE IF NOT EXISTS bronze.finance_bronze (
    ticker STRING, 
    datetime STRING, 
    close STRING COMMENT 'close price', 
    high STRING COMMENT 'high price', 
    low STRING COMMENT 'low price', 
    open STRING COMMENT 'open price', 
    `volume` STRING COMMENT 'volume'
) USING DELTA
LOCATION 'abfss://bronze@yahoofinancestorage.dfs.core.windows.net/finance_bronze';

-- Table to store transformed financial fact data with proper data types
DROP TABLE IF EXISTS silver.finance_silver;
CREATE TABLE IF NOT EXISTS silver.finance_silver (
    ticker STRING,
    date_id STRING, 
    datetime timestamp, 
    close DOUBLE COMMENT 'close price', 
    high DOUBLE COMMENT 'high price', 
    low DOUBLE COMMENT 'low price', 
    open DOUBLE COMMENT 'open price', 
    `volume` BIGINT COMMENT 'volume',
    inserted_datetime timestamp
) USING DELTA
LOCATION 'abfss://silver@yahoofinancestorage.dfs.core.windows.net/finance_silver';

-- Table to store transformed ticker data (companies' names)
DROP TABLE IF EXISTS silver.ticker_silver;
CREATE TABLE IF NOT EXISTS silver.ticker_silver (
    ticker_id BIGINT GENERATED ALWAYS AS IDENTITY,
    ticker STRING, 
    inserted_datetime timestamp
) USING DELTA
LOCATION 'abfss://silver@yahoofinancestorage.dfs.core.windows.net/ticker_silver';

-- Table to store tickers dimension for reports
DROP TABLE IF EXISTS gold.ticker_gold;
CREATE TABLE IF NOT EXISTS gold.ticker_gold (
    ticker_id BIGINT,
    ticker STRING 
) USING DELTA
LOCATION 'abfss://gold@yahoofinancestorage.dfs.core.windows.net/ticker_gold';

-- Table to store detailed financial fact data for reports
DROP TABLE IF EXISTS gold.finance_gold;
CREATE TABLE IF NOT EXISTS gold.finance_gold (
    ticker_id BIGINT,
    date_id STRING, 
    datetime timestamp, 
    close DOUBLE COMMENT 'close price', 
    high DOUBLE COMMENT 'high price', 
    low DOUBLE COMMENT 'low price', 
    open DOUBLE COMMENT 'open price', 
    `volume` BIGINT COMMENT 'volume'
) USING DELTA
LOCATION 'abfss://gold@yahoofinancestorage.dfs.core.windows.net/finance_gold';

-- Date dimension for reports
DROP TABLE IF EXISTS gold.date_gold;
CREATE TABLE IF NOT EXISTS gold.date_gold (
    date_id STRING,
    date DATE,
    week STRING,
    month STRING,
    year STRING
) USING DELTA
LOCATION 'abfss://gold@yahoofinancestorage.dfs.core.windows.net/date_gold';

-- Table to store daily aggregated financial fact data for reports
DROP TABLE IF EXISTS gold.finance_gold_daily;
CREATE TABLE IF NOT EXISTS gold.finance_gold_daily (
  ticker_id BIGINT,
  year STRING,
  month STRING,
  week STRING,
  date DATE,
  min_of_open DOUBLE,
  max_of_close DOUBLE,
  max_of_high DOUBLE,
  min_of_low DOUBLE,
  avg_of_close DOUBLE,
  stddev_of_close DOUBLE,
  median_of_close DOUBLE,
  sum_of_volume BIGINT
  )
USING DELTA
LOCATION 'abfss://gold@yahoofinancestorage.dfs.core.windows.net/finance_gold_daily';
-- Table to store weekly aggregated financial fact data for reports
DROP TABLE IF EXISTS gold.finance_gold_weekly;
CREATE TABLE IF NOT EXISTS gold.finance_gold_weekly (
  ticker_id BIGINT,
  year STRING,
  month STRING,
  week STRING,
  min_of_open DOUBLE,
  max_of_close DOUBLE,
  max_of_high DOUBLE,
  min_of_low DOUBLE,
  avg_of_close DOUBLE,
  stddev_of_close DOUBLE,
  median_of_close DOUBLE,
  sum_of_volume BIGINT
  )
USING DELTA
LOCATION 'abfss://gold@yahoofinancestorage.dfs.core.windows.net/finance_gold_weekly';

-- Table to store monthly aggregated financial fact data for reports
DROP TABLE IF EXISTS gold.finance_gold_monthly;
CREATE TABLE IF NOT EXISTS gold.finance_gold_monthly (
  ticker_id BIGINT,
  year STRING,
  month STRING,
  min_of_open DOUBLE,
  max_of_close DOUBLE,
  max_of_high DOUBLE,
  min_of_low DOUBLE,
  avg_of_close DOUBLE,
  stddev_of_close DOUBLE,
  median_of_close DOUBLE,
  sum_of_volume BIGINT
  )
USING DELTA
LOCATION 'abfss://gold@yahoofinancestorage.dfs.core.windows.net/finance_gold_monthly';

In [0]:
# Populate the date dimension table
import pandas as pd
min_date = '2025-01-01' # We will be getting data starting from this date
max_date = '2026-12-31' # We will be getting data up to this date (should be scheduled)
date_range = pd.date_range(start=min_date, end=max_date) # Generate date range
dim_date = pd.DataFrame({
   'date_id': date_range.strftime('%Y%m%d'),
   'date': date_range.date,
   'week': date_range.strftime('%U'),
   'month': date_range.strftime('%B'),
   'year': date_range.strftime('%Y')
})
dim_date_spark = spark.createDataFrame(dim_date) # Create Spark DataFrame from Pandas DataFrame
dim_date_spark.write.mode("overwrite").saveAsTable("yahoo_finance.gold.date_gold") # Save to table
