In [0]:
from datetime import datetime
from datetime import timedelta

In [0]:
# Function to identify if a table already exists
def table_exists(schema_name, database_name, table_name):
    spark.catalog.setCurrentCatalog(schema_name)
    
    return spark.catalog.tableExists(f'{database_name}.{table_name}')


In [0]:
if table_exists('gold', 'nytimes', 'daily_trending_topics'):
    last_updated = spark.sql('SHOW PARTITIONS gold.nytimes.daily_trending_topics').orderBy('ref_date', ascending = False).limit(1).collect()[0][0]

    end_date = spark.sql('SHOW PARTITIONS silver.nytimes.top_stories').orderBy('ref_date', ascending = False).limit(1).collect()[0][0]

    if last_updated == end_date:
        print('Table is already up to date')
        dbutils.notebook.exit()

    start_date = datetime.strptime(last_updated, '%Y-%m-%d').date() + timedelta(days=1)
    end_date = spark.sql('SHOW PARTITIONS silver.nytimes.top_stories').orderBy('ref_date', ascending = False).limit(1).collect()[0][0]
    print(f'Updating table with data from {start_date} to {end_date}')
else:
    start_date = spark.sql('SHOW PARTITIONS silver.nytimes.top_stories').orderBy('ref_date').limit(1).collect()[0][0]
    end_date = spark.sql('SHOW PARTITIONS silver.nytimes.top_stories').orderBy('ref_date', ascending = False).limit(1).collect()[0][0]
    print(f'Creating table with data from {start_date} to {end_date}') 


In [0]:
df_upload = spark.sql('''
  WITH topics AS (
    SELECT
      ref_date,
      explode(ds_topics) AS topic
    FROM silver.nytimes.top_stories
    WHERE ref_date >= {start_date}
  )

  SELECT
    ref_date,
    topic,
    COUNT(*) AS stories
  FROM topics
  GROUP BY ref_date, topic
  ORDER BY ref_date DESC, stories DESC
'''.format(start_date = start_date))

In [0]:
(df_upload.write
    .partitionBy('ref_date')
    .format("delta")
    .mode('overwrite')
    .option('replaceWhere', f'ref_date >= {start_date}')
    .saveAsTable("gold.nytimes.daily_trending_topics")
)

In [0]:
%sql

SHOW PARTITIONS gold.nytimes.daily_trending_topics;