<h2 align="center">Silver Layer</h2>
The data from the bronze layer is brought into the silver layer.

**Input data:** Staging tables: stg_orders, stg_products, stg_customers

**Transformations:** 
1. Customer Name and Phone Number in stg_customer table are cleansed and enriched. 
2. New column 'rounded_price' is added to stg_orders and stg_products tables to act as a composite key for join operations.
3. Data type of Price column in stg_orders is changed to double datatype.

**Output data:** Refined and enriched tables: ref_orders, ref_products, ref_customers.


In [0]:
%run /Users/vishnuas1987@gmail.com/PEI_Case_Study/Functions/PEI_transformation_functions

In [0]:
from pyspark.sql.functions import col,lit,to_date,round
from pyspark.sql import DataFrame
from pyspark.sql.functions import transform
from pyspark.sql.types import DateType
from pyspark.sql.functions import split

In [0]:
try:

    #create orders,products and customers dataframe from staging table
    df_products = spark.table("stg_products")
    df_customers = spark.table("stg_customers")
    df_orders = spark.table("stg_orders")

    #transform the customer data by cleaning Customer Name and Phone number.
    df_customers_trans = df_customers.transform(remove_non_alphabet,'Customer_Name').transform(enrich_name,'Customer_Name','email').transform(clean_phone_number,'phone')

    #transform the product data by creating a new column called rounded_price to be used for composite key with product_id
    df_product_trans = df_products.withColumn("rounded_price", round(col("Price_per_product")))

    #transform the orders data by 
    #       1. changing the data type of Price column to double
    #       2. creating a new column rounded_price to prevent duplication of product data while joining with products table
    #       3. making sure the null columns in rounded_price is set to 0.       
    df_orders_trans = df_orders.withColumn("Price", col("Price").cast("double")).\
        withColumn("Order_year", split(df_orders["Order_Date"], "/")[2]).\
            withColumn("rounded_price", round(col("Price") / col("Quantity"))).\
                withColumn("rounded_price", when(col("rounded_price").isNull(), 0).otherwise(col("rounded_price")))

    #write the orders, products and customer data into refined/silver layer
    df_customers_trans.write.format("delta").saveAsTable("ref_customers")
    df_product_trans.write.format("delta").saveAsTable("ref_products")
    df_orders_trans.write.format("delta").saveAsTable("ref_orders")  

except Exception as e:
    print("Error occurred while creating silver layer", str(e))

In [0]:
%sql
select * from ref_orders;

Customer_ID,Discount,Order_Date,Order_ID,Price,Product_ID,Profit,Quantity,Row_ID,Ship_Date,Ship_Mode,Order_year,rounded_price
JK-15370,0.3,21/8/2016,CA-2016-122581,573.174,FUR-CH-10002961,63.686,7,1,25/8/2016,Standard Class,2016,82.0
BD-11320,0.0,23/9/2017,CA-2017-117485,291.96,TEC-AC-10004659,102.186,4,2,29/9/2017,Standard Class,2017,73.0
LB-16795,0.7,6/10/2016,US-2016-157490,17.0,OFF-BI-10002824,-14.92,4,3,7/10/2016,First Class,2016,4.0
KB-16315,0.2,2/7/2015,CA-2015-111703,15.552,OFF-PA-10003349,5.6376,3,4,9/7/2015,Standard Class,2015,5.0
DO-13435,0.2,3/10/2014,CA-2014-108903,142.488,TEC-AC-10003023,-3.0,3,5,3/10/2014,Same Day,2014,47.0
CB-12025,0.0,27/11/2016,CA-2016-117583,79.95,OFF-BI-10004233,38.376,5,6,30/11/2016,First Class,2016,16.0
SM-20005,0.0,10/12/2014,CA-2014-148488,11.0,OFF-PA-10004470,5.2256,2,7,15/12/2014,Standard Class,2014,6.0
RD-19480,0.0,1/12/2016,CA-2016-136434,17.31,FUR-FU-10001196,5.193,3,8,7/12/2016,Standard Class,2016,6.0
JM-16195,0.0,30/4/2014,CA-2014-160094,826.0,OFF-ST-10000585,214.0,5,9,2/5/2014,First Class,2014,165.0
SC-20230,0.0,3/8/2017,CA-2017-141747,16.06,OFF-ST-10003996,4.1756,1,10,8/8/2017,Second Class,2017,16.0


In [0]:
%sql
select * from ref_products;

Product_ID,Category,Sub_Category,Product_Name,State,Price_per_product,rounded_price
FUR-CH-10002961,Furniture,Chairs,"Leather Task Chair, Black",New York,81.882,82.0
TEC-AC-10004659,Technology,Accessories,Imation Secure+ Hardware Encrypted USB 2.0 Flash Drive; 16GB,Oklahoma,72.99,73.0
OFF-BI-10002824,Office Supplies,Binders,Recycled Easel Ring Binders,Colorado,4.25,4.0
OFF-PA-10003349,Office Supplies,Paper,Xerox 1957,Florida,5.184,5.0
TEC-AC-10003023,Technology,Accessories,Logitech G105 Gaming Keyboard,Ohio,47.496,47.0
OFF-BI-10004233,Office Supplies,Binders,"GBC Pre-Punched Binding Paper, Plastic, White, 8-1/2"" x 11""",New Jersey,15.99,16.0
OFF-PA-10004470,Office Supplies,Paper,"Adams Write n' Stick Phone Message Book, 11"" X 5 1/4"", 200 Messages",New York,5.5,6.0
FUR-FU-10001196,Furniture,Furnishings,DAX Cubicle Frames - 8x10,Indiana,5.77,6.0
OFF-ST-10000585,Office Supplies,Storage,Economy Rollaway Files,Kentucky,165.2,165.0
OFF-ST-10003996,Office Supplies,Storage,"Letter/Legal File Tote with Clear Snap-On Lid, Black Granite",Washington,16.06,16.0


In [0]:
%sql
select * from ref_customers;

Customer_ID,Customer_Name,email,phone,address,Segment,Country,City,State,Postal_Code,Region,name_filled,_cleaned_phone
LW-16990,Lindsay Williams,angelahoward623@gmail.com,853-660-7372x805,"58474 Lopez Port New Chadtown, IL 55760",Corporate,United States,Monroe,North Carolina,28110,South,False,+1(853)660-7372x805
YS-21880,Yana Sorensen,tylerjones637@gmail.com,912-445-7716x34658,"5856 Nicole Crest Apt. 004 North Christineland, PW 84482",Corporate,United States,Los Angeles,California,90036,West,False,+1(912)445-7716x34658
EH-14125,Eugene Hildebrand,karenjacobs432@gmail.com,#ERROR!,"589 Stacey Prairie Suite 695 East Justin, DC 94097",Home Office,United States,Philadelphia,Pennsylvania,19140,East,False,Invalid phone number
BT-11305,Beth Thompson,audreyrichardson371@gmail.com,-3305,"589 Wilson Club North Lisaside, DE 98079",Home Office,United States,Superior,Wisconsin,54880,Central,False,Invalid phone number
EB-13750,Edward Becker,thomashickman353@gmail.com,(300)852-8375x7146,"58996 Anthony Trafficway Suite 095 Port Melissa, MT 84143",Corporate,United States,Plantation,Florida,33317,South,False,+1(300)852-8375x7146
KH-16630,Ken Heidel,randyelliott568@gmail.com,709-181-6245x66499,"59023 Boyd Forest Apt. 266 Port Davidmouth, CT 34959",Corporate,United States,Clinton,Maryland,20735,East,False,+1(709)181-6245x66499
EM-14095,Eudokia Martin,markstewart528@gmail.com,5087731130,"594 Cheyenne Heights Apt. 511 Rileyland, IL 73967",Corporate,United States,Springfield,Virginia,22153,South,False,+1(508)773-1130
MH-17290,Marc Harrigan,anthonysmith783@gmail.com,#ERROR!,"5949 Nancy Road Lake Todd, MD 59856",Home Office,United States,Los Angeles,California,90036,West,False,Invalid phone number
AR-10510,Andrew Roberts,janiceroman109@gmail.com,(185)406-7142x057,"5953 Gill Centers Masontown, MI 39024",Consumer,United States,Philadelphia,Pennsylvania,19134,East,False,+1(185)406-7142x057
TC-21295,Toby Carlisle,dianaherring411@gmail.com,#ERROR!,"5955 Jennifer Lakes Apt. 153 South Kevin, MP 42733",Consumer,United States,San Diego,California,92024,West,False,Invalid phone number
