In [0]:
from datetime import datetime
start_date = datetime.now()
start_date_str = start_date.strftime('%Y-%m-%d %H:%M:%S')

In [0]:
%sql
INSERT INTO yahoo_finance.gold.ticker_gold (ticker_id, ticker)
SELECT ts.ticker_id, ts.ticker
FROM yahoo_finance.silver.ticker_silver ts
WHERE ts.ticker NOT IN (SELECT ticker FROM yahoo_finance.gold.ticker_gold);

In [0]:
%sql
INSERT INTO yahoo_finance.gold.finance_gold (ticker_id, date_id, datetime, close, high, low, open, volume)
SELECT
  tg.ticker_id,
  f_s.date_id,
  f_s.datetime,
  f_s.close,
  f_s.high,
  f_s.low,
  f_s.open,
  f_s.`volume`
FROM yahoo_finance.silver.finance_silver f_s
LEFT OUTER JOIN yahoo_finance.gold.ticker_gold tg ON f_s.ticker = tg.ticker
LEFT OUTER JOIN
(SELECT l.ticker, MAX(l.last_loaded_date) AS last_loaded_date
  FROM yahoo_finance.processrunlogs.processrunlog l 
  WHERE l.processname = 'process_gold_data' AND l.status = 'Completed'
  GROUP BY l.ticker) l2
  ON f_s.ticker = l2.ticker 
WHERE f_s.datetime > nvl(l2.last_loaded_date, '2025-01-01');

In [0]:
%sql
TRUNCATE TABLE yahoo_finance.gold.finance_gold_daily;
INSeRT INTO yahoo_finance.gold.finance_gold_daily (ticker_id, year, month, week, date, min_of_open, max_of_close, max_of_high, min_of_low, avg_of_close, stddev_of_close, median_of_close, sum_of_volume)
SELECT
  fg.ticker_id,
  dg.year,
  dg.month,
  dg.week,
  dg.date,
  MIN(fg.open) AS min_of_open,
  MAX(fg.close) AS max_of_close,
  MAX(fg.high) AS max_of_high,
  MIN(fg.low) AS min_of_low,
  AVG(fg.close) AS avg_of_close,
  stddev(fg.close) AS stddev_of_close,
  median(fg.close) AS median_of_close,
  sum(fg.volume) AS sum_of_volume
FROM
  yahoo_finance.gold.finance_gold fg
  INNER JOIN yahoo_finance.gold.date_gold dg ON fg.date_id = dg.date_id 
GROUP BY
  fg.ticker_id,
  dg.year,
  dg.month,
  dg.week,
  dg.date;

In [0]:
%sql
TRUNCATE TABLE yahoo_finance.gold.finance_gold_weekly;
INSeRT INTO yahoo_finance.gold.finance_gold_weekly (ticker_id, year, month, week, min_of_open, max_of_close, max_of_high, min_of_low, avg_of_close, stddev_of_close, median_of_close, sum_of_volume)
SELECT
  fg.ticker_id,
  dg.year,
  dg.month,
  dg.week,
  MIN(fg.open) AS min_of_open,
  MAX(fg.close) AS max_of_close,
  MAX(fg.high) AS max_of_high,
  MIN(fg.low) AS min_of_low,
  AVG(fg.close) AS avg_of_close,
  stddev(fg.close) AS stddev_of_close,
  median(fg.close) AS median_of_close,
  sum(fg.volume) AS sum_of_volume
FROM
  yahoo_finance.gold.finance_gold fg
  INNER JOIN yahoo_finance.gold.date_gold dg ON fg.date_id = dg.date_id
GROUP BY
  fg.ticker_id,
  dg.year,
  dg.month,
  dg.week;

In [0]:
%sql
TRUNCATE TABLE yahoo_finance.gold.finance_gold_monthly;
INSeRT INTO yahoo_finance.gold.finance_gold_monthly (ticker_id, year, month, min_of_open, max_of_close, max_of_high, min_of_low, avg_of_close, stddev_of_close, median_of_close, sum_of_volume)
SELECT
  fg.ticker_id,
  dg.year,
  dg.month,
  MIN(fg.open) AS min_of_open,
  MAX(fg.close) AS max_of_close,
  MAX(fg.high) AS max_of_high,
  MIN(fg.low) AS min_of_low,
  AVG(fg.close) AS avg_of_close,
  stddev(fg.close) AS stddev_of_close,
  median(fg.close) AS median_of_close,
  sum(fg.volume) AS sum_of_volume
FROM
  yahoo_finance.gold.finance_gold fg
  INNER JOIN yahoo_finance.gold.date_gold dg ON fg.date_id = dg.date_id
GROUP BY
  fg.ticker_id,
  dg.year,
  dg.month;

In [0]:
# Get before_count - count of log rows before insertion of new rows
before_count = spark.sql("""
SELECT COUNT(*) as cnt
FROM yahoo_finance.processrunlogs.processrunlog
WHERE processname = 'process_gold_data' AND status = 'Completed'
""").collect()[0]['cnt']

# Try inserting new log rows
query_insert = f"""
INSERT INTO yahoo_finance.processrunlogs.processrunlog 
(ticker, processname, last_loaded_date, startdate, enddate, status)
SELECT 
    g.ticker, 
    'process_gold_data' AS processname, 
    max(f_g.datetime) AS last_loaded_date, 
    '{start_date_str}', 
    current_timestamp(), 
    'Completed'
FROM yahoo_finance.gold.finance_gold f_g
INNER JOIN yahoo_finance.gold.ticker_gold g 
    ON f_g.ticker_id = g.ticker_id
LEFT OUTER JOIN (
    SELECT l.ticker, MAX(l.last_loaded_date) AS last_loaded_date
    FROM yahoo_finance.processrunlogs.processrunlog l 
    WHERE l.processname = 'process_gold_data' 
      AND l.status = 'Completed' 
      AND l.last_loaded_date IS NOT NULL
    GROUP BY l.ticker
) l2
ON g.ticker = l2.ticker
WHERE f_g.Datetime > nvl(l2.last_loaded_date, '2025-01-01')
GROUP BY g.ticker
"""
spark.sql(query_insert)

# Get after_count - count of log rows after insertion of new rows
after_count = spark.sql("""
SELECT COUNT(*) as cnt
FROM yahoo_finance.processrunlogs.processrunlog
WHERE processname = 'process_gold_data' AND status = 'Completed'
""").collect()[0]['cnt']

# Insert NULL row if before_count == after_count to log that no new financials were loaded
if before_count == after_count:
    query_null = f"""
    INSERT INTO yahoo_finance.processrunlogs.processrunlog 
    (ticker, processname, last_loaded_date, startdate, enddate, status)
    VALUES (NULL, 'process_gold_data', NULL, '{start_date_str}', current_timestamp(), 'Completed')
    """
    spark.sql(query_null)