In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window

### **Data Reading**

In [0]:
df = spark.read.format("parquet") \
    .load("abfss://bronze@dlsdatabrickseteteamea.dfs.core.windows.net/orders")

In [0]:
display(df)

In [0]:
df.printSchema()

In [0]:
# rename column
df = df.withColumnRenamed("_rescued_data", "rescued_data")
df.printSchema()

In [0]:
# drop column
df = df.drop("rescued_data")

In [0]:
df.display()

**Transform with Function**

In [0]:
# convert data type of column order_date to timestamp
df = df.withColumn("order_date", to_timestamp(col('order_date')))
df.display()

In [0]:
# add new column
df = df.withColumn("year", year(col('order_date')))
df.display()

**Transform with Windows function**

In [0]:
# add column with dense_rank() windows function
df1 = df.withColumn("flag", dense_rank().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
df1.display()

In [0]:
# add column with rank() windows function
df1 = df1.withColumn("rank_flag", rank().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
df1.display()

In [0]:
# add column with row_number() windows function
df1 = df1.withColumn("row_flag", row_number().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
df1.display()

### **Classes - OOP**

In [0]:
class my_class_windows:
    # def __init__(self, df):
    #     self.df = df

    def dense_rank(self, df):
        # return dense_rank().over(Window.partitionBy("year").orderBy(desc("total_amount")))
        df_dense_rank = df.withColumn("flag", dense_rank().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
        return df_dense_rank
    
    def rank(self, df):
        # return dense_rank().over(Window.partitionBy("year").orderBy(desc("total_amount")))
        df_rank = df.withColumn("rank_flag", rank().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
        return df_rank
    
    def row_number(self, df):
        # return dense_rank().over(Window.partitionBy("year").orderBy(desc("total_amount")))
        df_row_number = df.withColumn("row_flag", row_number().over(Window.partitionBy("year").orderBy(desc("total_amount"))))
        return df_row_number

In [0]:
df_new = df

In [0]:
df_new.display()

**New object from class**

In [0]:
obj = my_class_windows()

In [0]:
df_result = obj.dense_rank(df_new)
df_result = obj.rank(df_result)
df_result = obj.row_number(df_result)
df_result.display()

### **Data Writing**

In [0]:
print(df.count())

In [0]:
df.write.format("delta").mode("overwrite").save("abfss://silver@dlsdatabrickseteteamea.dfs.core.windows.net/orders")

### Create The Delta Table from External Delta **file**

In [0]:
%sql
CREATE TABLE IF NOT EXISTS databricks_cata.silver.orders_silver
USING DELTA
LOCATION 'abfss://silver@dlsdatabrickseteteamea.dfs.core.windows.net/orders'