In [0]:
%run "./01_config"

In [0]:
class SetupHelper():
    def __init__(self, env):
        Conf = Config()
        self.landing_zone = Conf.landing + 'landing_zone'
        self.checkpoint_base = Conf.checkpoint + 'checkpoints'
        self.initial = Conf.medallion + "initial"
        self.bronze = Conf.medallion + "bronze"
        self.silver = Conf.medallion + "silver"
        self.gold = Conf.medallion + "gold"
        self.catalog = f"fitbit_{env}_catalog"
        self.db_name = Conf.db_name
        self.initialized = False

    def create_db(self):
        #spark.catalog.clearCache()
        print(f"Creating the database {self.catalog}.{self.db_name}...", end='')
        spark.sql(f"CREATE DATABASE IF NOT EXISTS {self.catalog}.{self.db_name}")
        spark.sql(f"USE {self.catalog}.{self.db_name}")
        self.initialized = True
        print("Done")
    
    def create_calories_min_bz(self):
        if(self.initialized):
            print(f"Creating calories_min_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.calories_min_bz(
                user_id long,
                activity_minute timestamp,
                calories double,
                date date,
                timeKey string,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/calories_min/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def create_heartrate_sec_bz(self):
        if(self.initialized):
            print(f"Creating heartrate_sec_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.heartrate_sec_bz(
                user_id long,
                time timestamp,
                value long,
                date date,
                timeKey string,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/heartrate_sec/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_intensities_min_bz(self):
        if(self.initialized):
            print(f"Creating intensities_min_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.intensities_min_bz(
                user_id long,
                activity_minute timestamp,
                intensity long,
                date date,
                timeKey string,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/intensities_min/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_METs_min_bz(self):
        if(self.initialized):
            print(f"Creating METs_min_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.METs_min_bz(
                user_id long,
                activity_minute timestamp,
                mets long,
                date date,
                timeKey string,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/METs_min/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_sleep_min_bz(self):
        if(self.initialized):
            print(f"Creating sleep_min_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.sleep_min_bz(
                user_id long,
                activity_minute timestamp,
                value long,
                log_id long,
                date date,
                timeKey string,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/sleep_min/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_steps_min_bz(self):
        if(self.initialized):
            print(f"Creating steps_min_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.steps_min_bz(
                user_id long,
                activity_minute timestamp,
                steps long,
                date date,
                timeKey string,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/steps_min/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_weight_daily_bz(self):
        if(self.initialized):
            print(f"Creating weight_daily_bz table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.weight_daily_bz(
                user_id long,
                date date,
                weight_kg double,
                weight_pounds double,
                fat double,
                bmi double,
                is_manual_report boolean,
                log_id long,
                activity_minute timestamp,
                load_time timestamp,
                source_file string
                )
                USING DELTA
                LOCATION '{self.bronze}/weight_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_calories_daily_sl(self):
        if(self.initialized):
            print(f"Creating calories_daily_sl table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.calories_daily_sl(
                user_id long,
                daily_calories double,
                date date
                )
                USING DELTA
                LOCATION '{self.silver}/calories_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_heartrate_min_sl(self):
        if(self.initialized):
            print(f"Creating heartrate_min_sl table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.heartrate_min_sl(
                user_id long,
                activity_minute timestamp,
                avg_heartrate double,
                max_heartrate double,
                date date,
                timeKey string
                )
                USING DELTA
                LOCATION '{self.silver}/heartrate_min/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def create_heartrate_daily_sl(self):
        if(self.initialized):
            print(f"Creating heartrate_daily_sl table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.heartrate_daily_sl(
                user_id long,
                avg_heartrate double,
                max_heartrate double,
                date date
                )
                USING DELTA
                LOCATION '{self.silver}/heartrate_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_intensities_daily_sl(self):
        if(self.initialized):
            print(f"Creating intensities_daily_sl table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.intensities_daily_sl(
                user_id long,
                sedentary_minutes double,
                lightly_active_minutes double,
                fairly_active_minutes double,
                very_active_minutes double,
                date date
                )
                USING DELTA
                LOCATION '{self.silver}/intensities_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def create_sleep_daily_sl(self):
        if(self.initialized):
            print(f"Creating sleep_daily_sl table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.sleep_daily_sl(
                user_id long,
                total_minutes_in_bed double,
                asleep_minutes double,
                Restless_minuts double,
                awake_minutes double,
                log_id long,
                date date
                )
                USING DELTA
                LOCATION '{self.silver}/sleep_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_steps_daily_sl(self):
        if(self.initialized):
            print(f"Creating steps_daily_sl table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.steps_daily_sl(
                user_id long,
                total_steps long,
                date date
                )
                USING DELTA
                LOCATION '{self.silver}/steps_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_activity_daily_gold(self):
        if(self.initialized):
            print(f"Creating activity_daily_gold table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.activity_daily_gold(
                user_id long,
                date date,
                total_steps long,
                total_calories long,
                very_active_minutes long,
                fairly_active_minutes long,
                lightly_active_minutes long,
                sedentary_minutes long,
                avg_heartrate double,
                max_heartrate double,
                asleep_minutes long,
                total_minutes_in_bed long
                )
                USING DELTA
                LOCATION '{self.gold}/activity_daily/'
            """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_date_lookup(self):
        if(self.initialized):
            print(f"Creating date_lookup table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.date_lookup(
                date date,
                week int,
                year int,
                month int,
                dayofweek int,
                dayofmonth int,
                dayofyear int,
                week_part string)
                USING DELTA
                LOCATION '{self.initial}/date_lookup/'
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def create_user_list(self):
        if(self.initialized):
            print(f"Creating user_list table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.user_list(
                user_id long
                )
                USING DELTA
                LOCATION '{self.initial}/user_list/'
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")
    
    def create_date_list(self):
        if(self.initialized):
            print(f"Creating date_list table...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.db_name}.date_list(
                date date
                )
                USING DELTA
                LOCATION '{self.initial}/date_list/'
                """)
            print("Done")
        else:
            raise ReferenceError("Application database is not defined. Cannot create table in default database.")

    def setup(self):
        import time
        start = int(time.time())
        print(f"\nStarting setup ...")
        self.create_db()
        self.create_calories_min_bz()
        self.create_heartrate_sec_bz()
        self.create_intensities_min_bz()
        self.create_METs_min_bz()
        self.create_sleep_min_bz()
        self.create_steps_min_bz()
        self.create_weight_daily_bz()
        self.create_calories_daily_sl()
        self.create_heartrate_min_sl()
        self.create_heartrate_daily_sl()
        self.create_intensities_daily_sl()
        self.create_sleep_daily_sl()
        self.create_steps_daily_sl()
        self.create_activity_daily_gold()
        self.create_date_lookup()
        self.create_user_list()
        self.create_date_list()
        print(f"Setup completed in {int(time.time()) - start} seconds")

    def validate(self):
        import time
        start = int(time.time())
        print(f"\nStarting setup validation ...")
        assert spark.sql(f"SHOW DATABASES IN {self.catalog}") \
            .filter(f"databaseName == '{self.db_name}'") \
            .count() == 1, f"The database '{self.catalog}.{self.db_name}' is missing"
        print(f"Found database {self.catalog}.{self.db_name}: Success")
        self.assert_table("calories_min_bz")
        self.assert_table("heartrate_sec_bz")
        self.assert_table("intensities_min_bz")
        self.assert_table("mets_min_bz")
        self.assert_table("sleep_min_bz")
        self.assert_table("steps_min_bz")
        self.assert_table("weight_daily_bz")
        self.assert_table("calories_daily_sl")
        self.assert_table("heartrate_min_sl")
        self.assert_table("heartrate_daily_sl")
        self.assert_table("intensities_daily_sl")
        self.assert_table("sleep_daily_sl")
        self.assert_table("steps_daily_sl")
        self.assert_table("activity_daily_gold")
        self.assert_table("user_list")
        self.assert_table("date_list")
        print(f"Setup validation completed in {int(time.time()) - start} seconds")

    def assert_table(self, table_name):
        assert spark.sql(f"SHOW TABLES IN {self.catalog}.{self.db_name}") \
            .filter(f"isTemporary == false and tableName == '{table_name}'") \
            .count() == 1, f"The table {table_name} is missing"
        print(f"Found {table_name} table in {self.catalog}.{self.db_name}: Success")

    def cleanup(self):
        if spark.sql(f"SHOW DATABASES IN {self.catalog}").filter(f"databaseName == '{self.db_name}'").count() == 1:
            print(f"Dropping the database {self.catalog}.{self.db_name}...", end='')
            spark.sql(f"DROP DATABASE {self.catalog}.{self.db_name} CASCADE")
            print("Done")
        # print(f"Deleting {self.landing_zone}...", end='')
        # dbutils.fs.rm(self.landing_zone, True)
        # print("Done")
        print(f"Deleting {self.checkpoint_base}...", end='')
        dbutils.fs.rm(self.checkpoint_base, True)
        print("Done")
        print(f"Deleting {self.initial}...", end='')
        dbutils.fs.rm(self.initial, True)
        print("Done")
        print(f"Deleting {self.bronze}...", end='')
        dbutils.fs.rm(self.bronze, True) 
        print("Done")
        print(f"Deleting {self.silver}...", end='')
        dbutils.fs.rm(self.silver, True)    
        print("Done")        
        print(f"Deleting {self.gold}...", end='')
        dbutils.fs.rm(self.gold, True)
        print("Done")