In [0]:
%run "../utils/configuration"

In [0]:
%run "../utils/encryption_helper"

Read data

In [0]:
expedia_df = spark.read.format("delta").table("bronze.expedia_raw")
hotel_weather_df = spark.read.format("delta").table("bronze.hotel_weather_raw")

Decrypt PII fields

In [0]:
encryption_helper = EncryptionHelper(dbutils)
try:
    expedia_df = encryption_helper.decrypt_dataframe(expedia_df, common_pii_fields.get("expedia"))
    hotel_weather_df = encryption_helper.decrypt_dataframe(hotel_weather_df, common_pii_fields.get("hotel_weather"))    
except Exception as e:
    print(f'Error during decryption of raw data: {e}')
    raise

Apply transformations

In [0]:
%run "../utils/transformation_helper"

In [0]:
transformation_helper = TransformationHelper()

expedia_timestamp_fields = ['date_time', 'srch_ci', 'srch_co']
hotel_weather_string_fields = ['address', 'city', 'country', 'geoHash', 'name']

expedia_df = transformation_helper.cast_to_timestamp(expedia_df, expedia_timestamp_fields)
expedia_df = transformation_helper.drop_duplicates(expedia_df, 'id')

hotel_weather_df = transformation_helper.cast_to_timestamp(hotel_weather_df, ['wthr_date'])
hotel_weather_df = transformation_helper.clean_string_types(hotel_weather_df, hotel_weather_string_fields)
hotel_weather_df = transformation_helper.cast_to_numeric(hotel_weather_df, 'id')
hotel_weather_df = transformation_helper.clean_null_values(hotel_weather_df, ['avg_tmpr_c', 'avg_tmpr_f'])
hotel_weather_df = transformation_helper.drop_duplicates(hotel_weather_df, 'id')


Encrypt PII fields

In [0]:
try:
    expedia_df = encryption_helper.encrypt_dataframe(expedia_df, common_pii_fields['expedia'])
    hotel_weather_df = encryption_helper.encrypt_dataframe(hotel_weather_df, common_pii_fields['hotel_weather'])
except Exception as e:
    print(f'Error during encryption: {e}')
    raise

Write data

In [0]:
expedia_df.write.mode("overwrite").format("delta").saveAsTable("silver.expedia_processed ")
hotel_weather_df.write.mode("overwrite").format("delta").saveAsTable("silver.hotel_weather_processed")