**Store IDs, keys, pathes**

In [None]:
application_id = "application-id"
directory_id = "directory-id"
secret = "secret"
bronze_path = "abfss://bronze@storage.dfs.core.windows.net"
silver_path = "abfss://silver@storage.dfs.core.windows.net"

**Configuration**

In [None]:
spark.conf.set("fs.azure.account.auth.type.storage.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.storage.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.storage.dfs.core.windows.net", f"{application_id}")
spark.conf.set("fs.azure.account.oauth2.client.secret.storage.dfs.core.windows.net", f"{secret}")
spark.conf.set("fs.azure.account.oauth2.client.endpoint.storage.dfs.core.windows.net", f"https://login.microsoftonline.com/{directory_id}")

**Set Directory**

In [None]:
dbutils.fs.ls(bronze_path)

# Read Data

**Trip Type**

In [None]:
trip_type_df = spark.read.format("csv")\
.option("inferSchema", True).option("header", True)\
.load(f"{bronze_path}/trip_type")
trip_type_df.display()

**Trip Zone**

In [None]:
trip_zone_df = spark.read.format("csv")\
.option("inferSchema", True).option("header", True)\
.load(f"{bronze_path}/trip_zone")
trip_zone_df.display()

**Trip Date**

In [None]:
my_schema = StructType([
    StructField("VendorID", LongType(), True),
    StructField("lpep_pickup_datetime", TimestampType(), True),
    StructField("lpep_dropoff_datetime", TimestampType(), True),
    StructField("store_and_fwd_flag", StringType(), True),
    StructField("RatecodeID", LongType(), True),
    StructField("PULocationID", LongType(), True),
    StructField("DOLocationID", LongType(), True),
    StructField("passenger_count", LongType(), True),
    StructField("trip_distance", DoubleType(), True),
    StructField("fare_amount", DoubleType(), True),
    StructField("extra", DoubleType(), True),
    StructField("mta_tax", DoubleType(), True),
    StructField("tip_amount", DoubleType(), True),
    StructField("tolls_amount", DoubleType(), True),
    StructField("ehail_fee", DoubleType(), True),
    StructField("improvement_surcharge", DoubleType(), True),
    StructField("total_amount", DoubleType(), True),
    StructField("payment_type", LongType(), True),
    StructField("trip_type", LongType(), True),
    StructField("congestion_surcharge", DoubleType(), True),
])

trip_df = spark.read.format("parquet").schema(my_schema).option("header", True).option("recursiveFileLookup", True)\
    .load(f"{bronze_path}/trips_data")
trip_df.display()

# Transforming Data

**Trip Type**

In [None]:
trip_type_df = trip_type_df\
    .withColumnRenamed("description", "trip_description")
trip_type_df.write.format("parquet").mode("append")\
    .option("path", f"{silver_path}/trip_type").save()
trip_type_df.display()

**Trip Zone**

In [None]:
trip_zone_df = trip_zone_df\
    .withColumn("Zone_1", split(col("Zone"), "/")[0])\
    .withColumn("Zone_2", split(col("Zone"), "/")[1])
trip_zone_df.write.format("parquet").mode("append")\
    .option("path", f"{silver_path}/trip_zone").save()
trip_zone_df.display()

**Trip Data**

In [None]:
trip_df = trip_df\
    .withColumn("trip_date", to_date("lpep_pickup_datetime"))\
    .withColumn("trip_year", year("lpep_pickup_datetime"))\
    .withColumn("trip_month", month("lpep_pickup_datetime"))
trip_df = trip_df\
    .select("VendorID", "PULocationID", "DOLocationID", "trip_distance", "fare_amount", "total_amount")
trip_df.display()

**Trip Data Analysis**

In [None]:
trip_df.write.format("parquet").mode("append").option("path", f"{silver_path}/trips").save()
display(trip_df)