In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

In [0]:
spark = SparkSession.builder.appName("GoldLayerCreation").getOrCreate()

In [0]:
# Read all silver layer tables
silver_sellers = spark.read.format("delta").load("/mnt/delta/tables/silver/sellers")
silver_buyers = spark.read.format("delta").load("/mnt/delta/tables/silver/buyers")
silver_users = spark.read.format("delta").load("/mnt/delta/tables/silver/users")
silver_countries = spark.read.format("delta").load("/mnt/delta/tables/silver/countries")

In [0]:
#Perform join operations to create one big table
comprehensive_user_table = silver_users \
    .join(silver_countries, ["country"], "outer") \
    .join(silver_buyers, ['country'], "outer") \
    .join(silver_sellers, ["country"], "outer")

In [0]:
comprehensive_user_table = comprehensive_user_table.select(
    
    # From silver layer countries table
    silver_countries["sellers"].alias("countries_sellers"),
    silver_countries["topsellers"].alias("countries_topsellers"),
    silver_countries["topsellerratio"].alias("countries_topsellerratio"),
    silver_countries["femalesellersratio"].alias("countries_femalesellersratio"),
    silver_countries["topfemalesellersratio"].alias("countries_topfemalesellersratio"),
    silver_countries["femalesellers"].alias("countries_femalesellers"),
    silver_countries["malesellers"].alias("countries_malesellers"),
    silver_countries["topfemalesellers"].alias("countries_topfemalesellers"),
    silver_countries["topmalesellers"].alias("countries_topmalesellers"),
    silver_countries["bestsoldratio"].alias("countries_bestsoldratio"),
    silver_countries["toptotalproductssold"].alias("countries_toptotalproductssold"),
    silver_countries["totalproductssold"].alias("countries_totalproductssold"),
    silver_countries["toptotalproductslisted"].alias("countries_toptotalproductslisted"),
    silver_countries["totalproductslisted"].alias("countries_totalproductslisted"),
    silver_countries["topmeanproductssold"].alias("countries_topmeanproductssold"),
    silver_countries["topmeanproductslisted"].alias("countries_topmeanproductslisted"),
    silver_countries["meanproductssold"].alias("countries_meanproductssold"),
    silver_countries["meanproductslisted"].alias("countries_meanproductslisted"),
    silver_countries["meanofflinedays"].alias("countries_meanofflinedays"),
    silver_countries["topmeanofflinedays"].alias("countries_topmeanofflinedays"),
    silver_countries["meanfollowers"].alias("countries_meanfollowers"),
    silver_countries["meanfollowing"].alias("countries_meanfollowing"),
    silver_countries["topmeanfollowers"].alias("countries_topmeanfollowers"),
    silver_countries["topmeanfollowing"].alias("countries_topmeanfollowing"),
    silver_countries["performance_indicator"].alias("countries_performance_indicator"),
    silver_countries["high_performance"].alias("countries_high_performance"),
    silver_countries["activity_level"].alias("countries_activity_level"),

    # From silver layer users table
    silver_users["identifierHash"].alias("users_identifierHash"),
    silver_users["type"].alias("users_type"),
    silver_users["country"].alias("users_country"),
    silver_users["language"].alias("users_language"),
    silver_users["socialnbfollowers"].alias("users_socialnbfollowers"),
    silver_users["socialnbfollows"].alias("users_socialnbfollows"),
    silver_users["socialProductsLiked"].alias("users_socialProductsLiked"),
    silver_users["productsListed"].alias("users_productsListed"),
    silver_users["productsSold"].alias("users_productsSold"),
    silver_users["productspassrate"].alias("users_productspassrate"),
    silver_users["productsWished"].alias("users_productsWished"),
    silver_users["productsBought"].alias("users_productsBought"),
    silver_users["gender"].alias("users_gender"),
    silver_users["civilityGenderId"].alias("users_civilityGenderId"),
    silver_users["civilityTitle"].alias("users_civilityTitle"),
    silver_users["hasanyapp"].alias("users_hasanyapp"),
    silver_users["hasandroidapp"].alias("users_hasandroidapp"),
    silver_users["hasiosapp"].alias("users_hasiosapp"),
    silver_users["hasprofilepicture"].alias("users_hasprofilepicture"),
    silver_users["dayssincelastlogin"].alias("users_dayssincelastlogin"),
    silver_users["seniority"].alias("users_seniority"),
    silver_users["seniorityasmonths"].alias("users_seniorityasmonths"),
    silver_users["seniorityasyears"].alias("users_seniorityasyears"),
    silver_users["countrycode"].alias("users_countrycode"),
    silver_users["language_full"].alias("users_language_full"),
    silver_users["civilitytitle_clean"].alias("users_civilitytitle_clean"),
    silver_users["years_since_last_login"].alias("users_years_since_last_login"),
    silver_users["account_age_years"].alias("users_account_age_years"),
    silver_users["account_age_group"].alias("users_account_age_group"),
    silver_users["current_year"].alias("users_current_year"),

    # From silver layer sellers table
    silver_sellers['country'].alias('sellers_country'),
    silver_sellers['sex'].alias('sellers_gender'),
    silver_sellers['meanproductslisted'].alias('sellers_meanproductslisted'),
    silver_sellers['meansellerpassrate'].alias('sellers_meansellerpassrate'),
    silver_sellers['totalproductssold'].alias('sellers_totalproductssold'),
    silver_sellers['totalproductslisted'].alias('sellers_totalproductslisted'),
    silver_sellers['meanproductsbought'].alias('sellers_meanproductsbought'),
    silver_sellers['meanproductswished'].alias('sellers_meanproductswished'),
    silver_sellers['meanproductsliked'].alias('sellers_meanproductsliked'),
    silver_sellers['totalbought'].alias('sellers_totalbought'),
    silver_sellers['totalwished'].alias('sellers_totalwished'),
    silver_sellers['totalproductsliked'].alias('sellers_totalproductsliked'),
    silver_sellers['meanfollowers'].alias('sellers_meanfollowers'),
    silver_sellers['meanfollows'].alias('sellers_meanfollows'),
    silver_sellers['percentofappusers'].alias('sellers_percentofappusers'),
    silver_sellers['percentofiosusers'].alias('sellers_percentofiosusers'),
    silver_sellers['meanseniority'].alias('sellers_meanseniority'),
    silver_sellers['mean_products_listed'].alias('sellers_mean_products_listed'),

    # From silver layers buyers table
    silver_buyers['country'].alias('buyers_country'),
    silver_buyers['topbuyers'].alias('buyers_top'),
    silver_buyers['topbuyerratio'].alias('buyers_topratio'),
    silver_buyers['femalebuyers'].alias('buyers_femalebuyers'),
    silver_buyers['malebuyers'].alias('buyers_malebuyers'),
    silver_buyers['topfemalebuyers'].alias('buyers_topfemale'),
    silver_buyers['topmalebuyers'].alias('buyers_topmale'),
    silver_buyers['femalebuyersratio'].alias('buyers_femaleratio'),
    silver_buyers['topfemalebuyersratio'].alias('buyers_topfemaleratio'),
    silver_buyers['boughtperwishlistratio'].alias('buyers_boughtperwishlistratio'),
    silver_buyers['boughtperlikeratio'].alias('buyers_boughtperlikeratio'),
    silver_buyers['topboughtperwishlistratio'].alias('buyers_topperwishlistratio'),
    silver_buyers['topboughtperlikeratio'].alias('buyers_topperlikeratio'),
    silver_buyers['totalproductsbought'].alias('buyers_totalproducts'),
    silver_buyers['totalproductswished'].alias('buyers_totalproductswished'),
    silver_buyers['totalproductsliked'].alias('buyers_totalproductsliked'),
    silver_buyers['toptotalproductsbought'].alias('buyers_toptotalproducts'),
    silver_buyers['toptotalproductswished'].alias('buyers_toptotalproductswished'),
    silver_buyers['toptotalproductsliked'].alias('buyers_toptotalproductsliked'),
    silver_buyers['meanproductsbought'].alias('buyers_meanproducts'),
    silver_buyers['meanproductswished'].alias('buyers_meanproductswished'),
    silver_buyers['meanproductsliked'].alias('buyers_meanproductsliked'),
    silver_buyers['topmeanproductsbought'].alias('buyers_topmeanproducts'),
    silver_buyers['topmeanproductswished'].alias('buyers_topmeanproductswished'),
    silver_buyers['topmeanproductsliked'].alias('buyers_topmeanproductsliked'),
    silver_buyers['meanofflinedays'].alias('buyers_meanofflinedays'),
    silver_buyers['topmeanofflinedays'].alias('buyers_topmeanofflinedays'),
    silver_buyers['meanfollowers'].alias('buyers_meanfollowers'),
    silver_buyers['meanfollowing'].alias('buyers_meanfollowing'),
    silver_buyers['topmeanfollowers'].alias('buyers_topmeanfollowers'),
    silver_buyers['topmeanfollowing'].alias('buyers_topmeanfollowing'),
    silver_buyers['female_to_make_ratio'].alias('buyers_female_to_make_ratio'),
    silver_buyers['wishlist_to_purchase_ratio'].alias('buyers_wishlist_to_purchase_ratio'),
    silver_buyers['high_engagement'].alias('buyers_high_engagement'),


)




In [0]:
# write delta table to gold layer
comprehensive_user_table.write.format("delta").mode("overwrite").save("/mnt/delta/tables/gold/")

In [0]:
#move user data from 'to_process' to 'processed' in azure data lake
#spark.conf.set("fs.azure.account.key.ecomadlsalyce.dfs.core.windows.net", #'Z2POLXsOZpax9W5HfcPyuqarEUocsnJwL0RM4ZAmdpheapyUDT4fdSsCDKphRSAJuFIH9rh3YMEH+AStJlpJhA==')
#dbutils.fs.ls("abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net")


[FileInfo(path='abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/buyers-raw-2/', name='buyers-raw-2/', size=0, modificationTime=1738256118000),
 FileInfo(path='abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/countries-raw-2/', name='countries-raw-2/', size=0, modificationTime=1738256132000),
 FileInfo(path='abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/processed-users-raw-2/', name='processed-users-raw-2/', size=0, modificationTime=1738604918000),
 FileInfo(path='abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/sellers-raw-2/', name='sellers-raw-2/', size=0, modificationTime=1738256125000),
 FileInfo(path='abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/to-process-users-raw-2/', name='to-process-users-raw-2/', size=0, modificationTime=1738256110000)]

In [0]:
dbutils.fs.mv('abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/to-process-users-raw-2/' , 'abfss://landing-zone-2@ecomadlsalyce.dfs.core.windows.net/processed-users-raw-2/', recurse = True)

True

In [0]:

%sql
--Creating database delta table for analytics
CREATE DATABASE IF NOT EXISTS comp_user;

USE comp_user;

CREATE OR REPLACE TEMP VIEW comprehensive_user_view
AS SELECT * FROM delta.`/mnt/delta/tables/gold`; 

CREATE OR REPLACE TABLE comprehensive_user_data
USING DELTA 
AS SELECT * FROM comprehensive_user_view;

num_affected_rows,num_inserted_rows
