## DLT Pipelines - Bronze to Silver 

In [0]:
import dlt
from pyspark.sql.functions import *

In [0]:
@dlt.table()

@dlt.expect_or_drop('cast_name_expectation','cast_name is not NULL')

def dim_cast():
    cast_df=spark.read.table('ott_catalog.bronze.ott_cast').select(col('cast').alias('cast_name'),coalesce(col('primary_profession'),lit('NA'))\
        .alias('primary_profession'),coalesce(col('known_for_title'),lit('NA')).alias('known_for_title'))
    return cast_df.distinct()


In [0]:
@dlt.table()

@dlt.expect_or_drop('category_name_expectations','category_name is not NULL')

def dim_category():
    category_df=spark.read.table('ott_catalog.bronze.ott_category').select(col('listed_in').alias('category_name')\
        ,coalesce(col('category_type'),lit('Genre')).alias('category_type'))
    return category_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('country_name_expectations','country_name is not NULL')

def dim_countries():
    countries_df=spark.read.table('ott_catalog.bronze.ott_countries').select(col('country').alias('country_name')\
        ,coalesce(col('continent'),lit('NA')).alias('continent')\
            ,coalesce(col('market_type'),lit('NA')).alias('market_type'))
    return countries_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('director_name_expectations','director_name is not NULL')

def dim_directors():
    directors_df=spark.read.table('ott_catalog.bronze.ott_directors').select(col('director').alias('director_name')\
        ,coalesce(col('directing_style'),lit('NA')).alias('directing_style')\
            ,coalesce(col('active_years'),lit('NA')).alias('active_years'))
    return directors_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('title_name_expectations','title_name is not NULL')
@dlt.expect_or_drop('show_id_expectations','show_id is not NULL')
def dim_titles():
    titles_df=spark.read.table('ott_catalog.bronze.ott_titles').select(col('title').alias('title_name')\
        ,coalesce(col('duration_minutes'),lit(0)).cast('int').alias('duration_minutes')\
            ,coalesce(col('duration_seasons'),lit(0)).cast('int').alias('duration_seasons')\
                ,coalesce(col('type'),lit('NA')).alias('type'),col('date_added'),col('release_year').cast('int'),col('rating')\
                    ,coalesce(col('description'),lit('NA')).alias('description')\
                        ,col('show_id'))
    return titles_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('show_id_expectation','show_id is not NULL')
@dlt.expect_or_drop('snapshot_date_expectation','snapshot_date is not NULL')


def fact_title_snapshot():
    ts_df=spark.read.table('ott_catalog.bronze.ott_title_snapshot').select(
        col('show_id'),col('snapshot_date'),coalesce(col('duration_minutes'),lit(0)).cast('int').alias('duration_minutes'),\
            coalesce(col('duration_seasons'),lit(0)).cast('int').alias('duration_seasons')\
                ,coalesce(col('is_movie_flag'),lit('N')).alias('is_movie_flag'),\
                    coalesce(col('is_series_flag'),lit('N')).alias('is_series_flag'),\
                        coalesce(col('is_new_release_flag'),lit('N')).alias('is_new_release_flag')
                
    )
    return ts_df.distinct()


##Bridge Tables - Silver


In [0]:
@dlt.table()

@dlt.expect_or_drop('show_id_expectation','show_id is not NULL')
@dlt.expect_or_drop('cast_name_expectation','cast_name is not NULL')

def show_cast_rel():
    scr_df=spark.read.table('ott_catalog.bronze.ott_cast').select(col('cast').alias('cast_name'),col('show_id'))
    return scr_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('show_id_expectation','show_id is not NULL')
@dlt.expect_or_drop('category_name_expectation','category_name is not NULL')

def show_category_rel():
    scr_df=spark.read.table('ott_catalog.bronze.ott_category').select(col('listed_in').alias('category_name'),col('show_id'))
    return scr_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('show_id_expectation','show_id is not NULL')
@dlt.expect_or_drop('countries_name_expectation','country_name is not NULL')

def show_countries_rel():
    scr_df=spark.read.table('ott_catalog.bronze.ott_countries').select(col('country').alias('country_name'),col('show_id'))
    return scr_df.distinct()

In [0]:
@dlt.table()

@dlt.expect_or_drop('show_id_expectation','show_id is not NULL')
@dlt.expect_or_drop('directors_name_expectation','director_name is not NULL')

def show_directors_rel():
    sdr_df=spark.read.table('ott_catalog.bronze.ott_directors').select(col('director').alias('director_name'),col('show_id'))
    return sdr_df.distinct()