# Bronze Layer

In [None]:
from pyspark.sql.functions import *

## Creating Access to ADLS 

In [None]:
# Set your storage account name and access key
storage_account_name = "awdataprojectbasit"
storage_account_key = "****************************************************=="
container_name = "<your-container-name>"
file_path = "<your-file-path>"  # e.g., "folder/data.csv"

# Set Spark config to use the access key
spark.conf.set(f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net", storage_account_key)


### Data Loading using dbutils

In [None]:
dbutils.fs.ls("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/")

[FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/calendar/', name='calendar/', size=0, modificationTime=1748349088000),
 FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/customers/', name='customers/', size=0, modificationTime=1748349108000),
 FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/product-categories/', name='product-categories/', size=0, modificationTime=1748350083000),
 FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/product-subcategories/', name='product-subcategories/', size=0, modificationTime=1748349125000),
 FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/products/', name='products/', size=0, modificationTime=1748349159000),
 FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/returns/', name='returns/', size=0, modificationTime=1748349176000),
 FileInfo(path='abfss://bronze@awdataprojectbasit.dfs.core.windows.net/sales-2015/', name='sales-2015/

### Creating Dataframes

In [None]:
df_calender = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/calendar/")

In [None]:
display(df_calender)

Date
2015-01-01
2015-01-02
2015-01-03
2015-01-04
2015-01-05
2015-01-06
2015-01-07
2015-01-08
2015-01-09
2015-01-10


In [None]:
df_customers = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/customers/")

In [None]:
df_customers.printSchema()

root
 |-- CustomerKey: integer (nullable = true)
 |-- Prefix: string (nullable = true)
 |-- FirstName: string (nullable = true)
 |-- LastName: string (nullable = true)
 |-- BirthDate: date (nullable = true)
 |-- MaritalStatus: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- EmailAddress: string (nullable = true)
 |-- AnnualIncome: string (nullable = true)
 |-- TotalChildren: integer (nullable = true)
 |-- EducationLevel: string (nullable = true)
 |-- Occupation: string (nullable = true)
 |-- HomeOwner: string (nullable = true)



In [None]:
df_product_categories = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/product-categories/")

In [None]:
df_product_categories.printSchema()

root
 |-- ProductCategoryKey: integer (nullable = true)
 |-- CategoryName: string (nullable = true)



In [None]:
df_product_subcategories = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/product-subcategories/")

In [None]:
display(df_product_subcategories)

ProductSubcategoryKey,SubcategoryName,ProductCategoryKey
1,Mountain Bikes,1
2,Road Bikes,1
3,Touring Bikes,1
4,Handlebars,2
5,Bottom Brackets,2
6,Brakes,2
7,Chains,2
8,Cranksets,2
9,Derailleurs,2
10,Forks,2


In [None]:
df_products = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/products/")

In [None]:
df_products.count()

293

In [None]:
df_products.show(10)

+----------+---------------------+----------+--------------------+--------------------+--------------------+------------+-----------+------------+-----------+------------+
|ProductKey|ProductSubcategoryKey|ProductSKU|         ProductName|           ModelName|  ProductDescription|ProductColor|ProductSize|ProductStyle|ProductCost|ProductPrice|
+----------+---------------------+----------+--------------------+--------------------+--------------------+------------+-----------+------------+-----------+------------+
|       214|                   31| HL-U509-R|Sport-100 Helmet,...|           Sport-100|Universal fit, we...|         Red|          0|           0|    13.0863|       34.99|
|       215|                   31|   HL-U509|Sport-100 Helmet,...|           Sport-100|Universal fit, we...|       Black|          0|           0|    12.0278|     33.6442|
|       218|                   23| SO-B909-M|Mountain Bike Soc...| Mountain Bike Socks|Combination of na...|       White|          M|       

In [None]:
df_returns = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/returns/")

In [None]:
df_returns.show(10)
     

+----------+------------+----------+--------------+
|ReturnDate|TerritoryKey|ProductKey|ReturnQuantity|
+----------+------------+----------+--------------+
|2015-01-18|           9|       312|             1|
|2015-01-18|          10|       310|             1|
|2015-01-21|           8|       346|             1|
|2015-01-22|           4|       311|             1|
|2015-02-02|           6|       312|             1|
|2015-02-15|           1|       312|             1|
|2015-02-19|           9|       311|             1|
|2015-02-24|           8|       314|             1|
|2015-03-08|           8|       350|             1|
|2015-03-13|           9|       350|             1|
+----------+------------+----------+--------------+
only showing top 10 rows



In [None]:
df_returns.count()

1809

In [None]:
df_sales_2015 = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/sales-2015/")

In [None]:
df_sales_2015.count()

2630

In [None]:
df_sales_2015.show(10)

+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
| OrderDate| StockDate|OrderNumber|ProductKey|CustomerKey|TerritoryKey|OrderLineItem|OrderQuantity|
+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
|2015-01-01|2001-09-21|    SO45080|       332|      14657|           1|            1|            1|
|2015-01-01|2001-12-05|    SO45079|       312|      29255|           4|            1|            1|
|2015-01-01|2001-10-29|    SO45082|       350|      11455|           9|            1|            1|
|2015-01-01|2001-11-16|    SO45081|       338|      26782|           6|            1|            1|
|2015-01-02|2001-12-15|    SO45083|       312|      14947|          10|            1|            1|
|2015-01-02|2001-10-12|    SO45084|       310|      29143|           4|            1|            1|
|2015-01-02|2001-12-18|    SO45086|       314|      18747|           9|            1|            1|


In [None]:
df_sales_2016 = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/sales-2016/")

In [None]:
df_sales_2016.count()

23935

In [None]:
df_sales_2016.show(10)

+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
| OrderDate| StockDate|OrderNumber|ProductKey|CustomerKey|TerritoryKey|OrderLineItem|OrderQuantity|
+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
|2016-01-01|2002-10-17|    SO48797|       385|      14335|           1|            1|            1|
|2016-01-01|2002-09-30|    SO48802|       383|      24923|           9|            1|            1|
|2016-01-01|2002-11-29|    SO48801|       326|      15493|           1|            1|            1|
|2016-01-01|2002-11-16|    SO48799|       352|      26708|           4|            1|            1|
|2016-01-01|2002-12-16|    SO48798|       369|      23332|           9|            1|            1|
|2016-01-01|2002-12-02|    SO48800|       342|      15491|           5|            1|            1|
|2016-01-01|2002-10-19|    SO48795|       375|      16538|           8|            1|            1|


In [None]:
df_sales_2017 = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/sales-2017/")

In [None]:
df_sales_2017.count()

29481

In [None]:
df_sales_2017.show(10)

+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
| OrderDate| StockDate|OrderNumber|ProductKey|CustomerKey|TerritoryKey|OrderLineItem|OrderQuantity|
+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
|2017-01-01|2003-12-13|    SO61285|       529|      23791|           1|            2|            2|
|2017-01-01|2003-09-24|    SO61285|       214|      23791|           1|            3|            1|
|2017-01-01|2003-09-04|    SO61285|       540|      23791|           1|            1|            1|
|2017-01-01|2003-09-28|    SO61301|       529|      16747|           1|            2|            2|
|2017-01-01|2003-10-21|    SO61301|       377|      16747|           1|            1|            1|
|2017-01-01|2003-10-23|    SO61301|       540|      16747|           1|            3|            1|
|2017-01-01|2003-09-04|    SO61269|       215|      11792|           4|            1|            1|


In [None]:
df_territories = spark.read.format("csv").option("header", "true").option("inferSchema", True).load("abfss://bronze@awdataprojectbasit.dfs.core.windows.net/territories/")

In [None]:
df_territories.count()

10

In [None]:
df_territories.printSchema()

root
 |-- SalesTerritoryKey: integer (nullable = true)
 |-- Region: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- Continent: string (nullable = true)



In [None]:
df_territories.show(10)

+-----------------+--------------+--------------+-------------+
|SalesTerritoryKey|        Region|       Country|    Continent|
+-----------------+--------------+--------------+-------------+
|                1|     Northwest| United States|North America|
|                2|     Northeast| United States|North America|
|                3|       Central| United States|North America|
|                4|     Southwest| United States|North America|
|                5|     Southeast| United States|North America|
|                6|        Canada|        Canada|North America|
|                7|        France|        France|       Europe|
|                8|       Germany|       Germany|       Europe|
|                9|     Australia|     Australia|      Pacific|
|               10|United Kingdom|United Kingdom|       Europe|
+-----------------+--------------+--------------+-------------+



## Transformations

#### Calender

In [None]:
df_calender = df_calender.withColumn("day", dayofmonth("Date")) \
           .withColumn("month", month("Date")) \
           .withColumn("year", year("Date"))
df_calender.show()

+----------+---+-----+----+
|      Date|day|month|year|
+----------+---+-----+----+
|2015-01-01|  1|    1|2015|
|2015-01-02|  2|    1|2015|
|2015-01-03|  3|    1|2015|
|2015-01-04|  4|    1|2015|
|2015-01-05|  5|    1|2015|
|2015-01-06|  6|    1|2015|
|2015-01-07|  7|    1|2015|
|2015-01-08|  8|    1|2015|
|2015-01-09|  9|    1|2015|
|2015-01-10| 10|    1|2015|
|2015-01-11| 11|    1|2015|
|2015-01-12| 12|    1|2015|
|2015-01-13| 13|    1|2015|
|2015-01-14| 14|    1|2015|
|2015-01-15| 15|    1|2015|
|2015-01-16| 16|    1|2015|
|2015-01-17| 17|    1|2015|
|2015-01-18| 18|    1|2015|
|2015-01-19| 19|    1|2015|
|2015-01-20| 20|    1|2015|
+----------+---+-----+----+
only showing top 20 rows



In [None]:
df_calender.printSchema()

root
 |-- Date: date (nullable = true)
 |-- day: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- year: integer (nullable = true)



In [None]:
df_calender.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/Calender/")\
                 .save()

#### Customer 

In [None]:
df_customers.show(10)

+-----------+------+---------+--------+----------+-------------+------+--------------------+------------+-------------+--------------+------------+---------+
|CustomerKey|Prefix|FirstName|LastName| BirthDate|MaritalStatus|Gender|        EmailAddress|AnnualIncome|TotalChildren|EducationLevel|  Occupation|HomeOwner|
+-----------+------+---------+--------+----------+-------------+------+--------------------+------------+-------------+--------------+------------+---------+
|      11000|   MR.|      JON|    YANG|1966-04-08|            M|     M|jon24@adventure-w...|    $90,000 |            2|     Bachelors|Professional|        Y|
|      11001|   MR.|   EUGENE|   HUANG|1965-05-14|            S|     M|eugene10@adventur...|    $60,000 |            3|     Bachelors|Professional|        N|
|      11002|   MR.|    RUBEN|  TORRES|1965-08-12|            M|     M|ruben35@adventure...|    $60,000 |            3|     Bachelors|Professional|        Y|
|      11003|   MS.|  CHRISTY|     ZHU|1968-02-15|  

In [None]:
df_customers = df_customers.withColumn('fullName', concat_ws(' ', col('Prefix'),col('FirstName'), col('LastName')))

In [None]:
df_customers.show(10)


+-----------+------+---------+--------+----------+-------------+------+--------------------+------------+-------------+--------------+------------+---------+--------------------+
|CustomerKey|Prefix|FirstName|LastName| BirthDate|MaritalStatus|Gender|        EmailAddress|AnnualIncome|TotalChildren|EducationLevel|  Occupation|HomeOwner|            fullName|
+-----------+------+---------+--------+----------+-------------+------+--------------------+------------+-------------+--------------+------------+---------+--------------------+
|      11000|   MR.|      JON|    YANG|1966-04-08|            M|     M|jon24@adventure-w...|    $90,000 |            2|     Bachelors|Professional|        Y|        MR. JON YANG|
|      11001|   MR.|   EUGENE|   HUANG|1965-05-14|            S|     M|eugene10@adventur...|    $60,000 |            3|     Bachelors|Professional|        N|    MR. EUGENE HUANG|
|      11002|   MR.|    RUBEN|  TORRES|1965-08-12|            M|     M|ruben35@adventure...|    $60,000 |

In [None]:
df_customers.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/Customers/")\
                 .save()

In [None]:
df_product_subcategories.display()

ProductSubcategoryKey,SubcategoryName,ProductCategoryKey
1,Mountain Bikes,1
2,Road Bikes,1
3,Touring Bikes,1
4,Handlebars,2
5,Bottom Brackets,2
6,Brakes,2
7,Chains,2
8,Cranksets,2
9,Derailleurs,2
10,Forks,2


In [None]:
df_product_categories.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/product-categories/")\
                 .save()

In [None]:
df_products.printSchema()

root
 |-- ProductKey: integer (nullable = true)
 |-- ProductSubcategoryKey: integer (nullable = true)
 |-- ProductSKU: string (nullable = true)
 |-- ProductName: string (nullable = true)
 |-- ModelName: string (nullable = true)
 |-- ProductDescription: string (nullable = true)
 |-- ProductColor: string (nullable = true)
 |-- ProductSize: string (nullable = true)
 |-- ProductStyle: string (nullable = true)
 |-- ProductCost: double (nullable = true)
 |-- ProductPrice: double (nullable = true)



In [None]:
df_products.show(10)

+----------+---------------------+----------+--------------------+--------------------+--------------------+------------+-----------+------------+-----------+------------+
|ProductKey|ProductSubcategoryKey|ProductSKU|         ProductName|           ModelName|  ProductDescription|ProductColor|ProductSize|ProductStyle|ProductCost|ProductPrice|
+----------+---------------------+----------+--------------------+--------------------+--------------------+------------+-----------+------------+-----------+------------+
|       214|                   31| HL-U509-R|Sport-100 Helmet,...|           Sport-100|Universal fit, we...|         Red|          0|           0|    13.0863|       34.99|
|       215|                   31|   HL-U509|Sport-100 Helmet,...|           Sport-100|Universal fit, we...|       Black|          0|           0|    12.0278|     33.6442|
|       218|                   23| SO-B909-M|Mountain Bike Soc...| Mountain Bike Socks|Combination of na...|       White|          M|       

In [None]:
df_products=df_products.withColumn("Profit", col("ProductPrice") - col("ProductCost"))\
            .withColumn("Profit Percentage", (col("Profit") / col("ProductCost")) * 100)\
            

In [None]:
df_products= df_products.withColumn("SKU_Prefix", split("ProductSKU", "-").getItem(0))

In [None]:
df_products.display()

ProductKey,ProductSubcategoryKey,ProductSKU,ProductName,ModelName,ProductDescription,ProductColor,ProductSize,ProductStyle,ProductCost,ProductPrice,Profit,Profit Percentage,SKU_Prefix
214,31,HL-U509-R,"Sport-100 Helmet, Red",Sport-100,"Universal fit, well-vented, lightweight , snap-on visor.",Red,0,0,13.0863,34.99,21.9037,167.37886186316987,HL
215,31,HL-U509,"Sport-100 Helmet, Black",Sport-100,"Universal fit, well-vented, lightweight , snap-on visor.",Black,0,0,12.0278,33.6442,21.6164,179.72031460449958,HL
218,23,SO-B909-M,"Mountain Bike Socks, M",Mountain Bike Socks,Combination of natural and synthetic fibers stays dry and provides just the right cushioning.,White,M,U,3.3963,9.5,6.1037,179.71616170538528,SO
219,23,SO-B909-L,"Mountain Bike Socks, L",Mountain Bike Socks,Combination of natural and synthetic fibers stays dry and provides just the right cushioning.,White,L,U,3.3963,9.5,6.1037,179.71616170538528,SO
220,31,HL-U509-B,"Sport-100 Helmet, Blue",Sport-100,"Universal fit, well-vented, lightweight , snap-on visor.",Blue,0,0,12.0278,33.6442,21.6164,179.72031460449958,HL
223,19,CA-1098,AWC Logo Cap,Cycling Cap,Traditional style with a flip-up brim; one-size fits all.,Multi,0,U,5.7052,8.6442,2.939,51.51440790857463,CA
226,21,LJ-0192-S,"Long-Sleeve Logo Jersey, S",Long-Sleeve Logo Jersey,Unisex long-sleeve AWC logo microfiber cycling jersey,Multi,S,U,31.7244,48.0673,16.342900000000004,51.51523748282081,LJ
229,21,LJ-0192-M,"Long-Sleeve Logo Jersey, M",Long-Sleeve Logo Jersey,Unisex long-sleeve AWC logo microfiber cycling jersey,Multi,M,U,31.7244,48.0673,16.342900000000004,51.51523748282081,LJ
232,21,LJ-0192-L,"Long-Sleeve Logo Jersey, L",Long-Sleeve Logo Jersey,Unisex long-sleeve AWC logo microfiber cycling jersey,Multi,L,U,31.7244,48.0673,16.342900000000004,51.51523748282081,LJ
235,21,LJ-0192-X,"Long-Sleeve Logo Jersey, XL",Long-Sleeve Logo Jersey,Unisex long-sleeve AWC logo microfiber cycling jersey,Multi,XL,U,31.7244,48.0673,16.342900000000004,51.51523748282081,LJ


In [None]:
df_products.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/products/")\
                 .save()

In [None]:
df_product_subcategories.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/product-subcategories/")\
                 .save()

In [None]:
df_returns.display()

ReturnDate,TerritoryKey,ProductKey,ReturnQuantity
2015-01-18,9,312,1
2015-01-18,10,310,1
2015-01-21,8,346,1
2015-01-22,4,311,1
2015-02-02,6,312,1
2015-02-15,1,312,1
2015-02-19,9,311,1
2015-02-24,8,314,1
2015-03-08,8,350,1
2015-03-13,9,350,1


In [None]:
df_returns.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/returns/")\
                 .save()

In [None]:
df_territories.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/territories/")\
                 .save()

#### Sales Data

In [None]:
df_sales_2015.show()

+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
| OrderDate| StockDate|OrderNumber|ProductKey|CustomerKey|TerritoryKey|OrderLineItem|OrderQuantity|
+----------+----------+-----------+----------+-----------+------------+-------------+-------------+
|2015-01-01|2001-09-21|    SO45080|       332|      14657|           1|            1|            1|
|2015-01-01|2001-12-05|    SO45079|       312|      29255|           4|            1|            1|
|2015-01-01|2001-10-29|    SO45082|       350|      11455|           9|            1|            1|
|2015-01-01|2001-11-16|    SO45081|       338|      26782|           6|            1|            1|
|2015-01-02|2001-12-15|    SO45083|       312|      14947|          10|            1|            1|
|2015-01-02|2001-10-12|    SO45084|       310|      29143|           4|            1|            1|
|2015-01-02|2001-12-18|    SO45086|       314|      18747|           9|            1|            1|


In [None]:
df_sales = df_sales_2015.union(df_sales_2016).union(df_sales_2017)

In [None]:
df_sales.printSchema()

root
 |-- OrderDate: date (nullable = true)
 |-- StockDate: date (nullable = true)
 |-- OrderNumber: string (nullable = true)
 |-- ProductKey: integer (nullable = true)
 |-- CustomerKey: integer (nullable = true)
 |-- TerritoryKey: integer (nullable = true)
 |-- OrderLineItem: integer (nullable = true)
 |-- OrderQuantity: integer (nullable = true)



In [None]:
from pyspark.sql.functions import *

In [None]:
df_sales = df_sales.withColumn("Year", year('OrderDate'))

In [None]:
df_sales.printSchema()

root
 |-- OrderDate: date (nullable = true)
 |-- StockDate: date (nullable = true)
 |-- OrderNumber: string (nullable = true)
 |-- ProductKey: integer (nullable = true)
 |-- CustomerKey: integer (nullable = true)
 |-- TerritoryKey: integer (nullable = true)
 |-- OrderLineItem: integer (nullable = true)
 |-- OrderQuantity: integer (nullable = true)
 |-- Year: integer (nullable = true)



In [None]:
product_sales = df_sales.groupBy("ProductKey").agg(
    sum("OrderQuantity").alias("TotalQuantityOrdered")
)

In [None]:
df_sales.withColumn("TotalLineQuantity", col('OrderLineItem')*col('OrderQuantity')).filter(col('TotalLineQuantity') != 1).show()

+----------+----------+-----------+----------+-----------+------------+-------------+-------------+----+-----------------+
| OrderDate| StockDate|OrderNumber|ProductKey|CustomerKey|TerritoryKey|OrderLineItem|OrderQuantity|Year|TotalLineQuantity|
+----------+----------+-----------+----------+-----------+------------+-------------+-------------+----+-----------------+
|2016-07-01|2003-03-08|    SO51176|       479|      18239|           9|            2|            1|2016|                2|
|2016-07-01|2003-04-06|    SO51179|       529|      22430|           7|            3|            2|2016|                6|
|2016-07-01|2003-04-27|    SO51179|       486|      22430|           7|            4|            1|2016|                4|
|2016-07-01|2003-04-06|    SO51179|       540|      22430|           7|            2|            1|2016|                2|
|2016-07-01|2003-04-06|    SO51185|       541|      27767|           8|            2|            3|2016|                6|
|2016-07-01|2003

In [None]:
df_sales = df_sales.withColumn("TotalLineQuantity", col('OrderLineItem')*col('OrderQuantity'))

In [None]:
df_sales.write.format('parquet')\
                 .mode('append')\
                 .option('path', "abfss://silver@awdataprojectbasit.dfs.core.windows.net/sales/")\
                 .save() 

#### Sales Analysis

In [None]:
df_sales.groupBy('OrderDate').agg(count('OrderNumber').alias('total_order')).display()

OrderDate,total_order
2015-03-09,6
2015-05-19,6
2015-03-06,9
2015-04-09,5
2015-09-02,7
2015-12-22,12
2015-05-10,3
2015-09-28,2
2015-03-12,5
2015-03-16,5


Databricks visualization. Run in Databricks to view.

In [None]:
df_product_categories.display()

ProductCategoryKey,CategoryName
1,Bikes
2,Components
3,Clothing
4,Accessories


Databricks visualization. Run in Databricks to view.

In [None]:
df_territories.display()

SalesTerritoryKey,Region,Country,Continent
1,Northwest,United States,North America
2,Northeast,United States,North America
3,Central,United States,North America
4,Southwest,United States,North America
5,Southeast,United States,North America
6,Canada,Canada,North America
7,France,France,Europe
8,Germany,Germany,Europe
9,Australia,Australia,Pacific
10,United Kingdom,United Kingdom,Europe


Databricks visualization. Run in Databricks to view.