### Ingest payment.csv file

In [0]:
%run "../_src/config"

In [0]:
%run "../_src/bronze_functions"

##### Step 1 - Read the CSV file using the spark dataframe reader

In [0]:
col_names = ["payment_id", "date", "amount", "rider_id"]

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, DateType, StringType, DoubleType

In [0]:
payment_schema = StructType(fields=[StructField("payment_id", IntegerType(), False),
                                     StructField("date", DateType(), True),
                                     StructField("amount", DoubleType(), True),
                                     StructField("rider_id", StringType(), True)
])

In [0]:
df = read_files('landingzone', 'payments', 'csv', col_names, payment_schema)

reading file from: /mnt/bikesharedlake/landingzone/payments.csv
file successfully read from:/mnt/bikesharedlake/landingzone/payments.csv


##### Step 2 - Add ingestion date to the dataframe

In [0]:
df = df_add_columns(df, add_timestamp=True)

In [0]:
display(df)

payment_id,date,amount,rider_id,ingestion_date
2,2019-06-01,9.0,1000,2023-06-18T14:01:38.032+0000
3,2019-07-01,9.0,1000,2023-06-18T14:01:38.032+0000
4,2019-08-01,9.0,1000,2023-06-18T14:01:38.032+0000
5,2019-09-01,9.0,1000,2023-06-18T14:01:38.032+0000
6,2019-10-01,9.0,1000,2023-06-18T14:01:38.032+0000
7,2019-11-01,9.0,1000,2023-06-18T14:01:38.032+0000
8,2019-12-01,9.0,1000,2023-06-18T14:01:38.032+0000
9,2020-01-01,9.0,1000,2023-06-18T14:01:38.032+0000
10,2020-02-01,9.0,1000,2023-06-18T14:01:38.032+0000
11,2020-03-01,9.0,1000,2023-06-18T14:01:38.032+0000


##### Step 3 - Write data to bronze as parquet

In [0]:
write_parquet_table(df, 'bronze', 'payment')

writing bronze file: /mnt/bikesharedlake/bronze/payment
Table successfully written: /mnt/bikesharedlake/bronze/payment


##### Step 4 - Write data to silver as delta

In [0]:
payment_df = spark.read \
.option("header", True) \
.parquet(f"{bronze_folder_path}/payment")

In [0]:
payment_df.write.mode("overwrite").format("delta").saveAsTable("silver.payment")

In [0]:
%sql
SELECT * FROM silver.payment;

payment_id,date,amount,rider_id,ingestion_date
2,2019-06-01,9.0,1000,2023-06-18T06:34:59.973+0000
3,2019-07-01,9.0,1000,2023-06-18T06:34:59.973+0000
4,2019-08-01,9.0,1000,2023-06-18T06:34:59.973+0000
5,2019-09-01,9.0,1000,2023-06-18T06:34:59.973+0000
6,2019-10-01,9.0,1000,2023-06-18T06:34:59.973+0000
7,2019-11-01,9.0,1000,2023-06-18T06:34:59.973+0000
8,2019-12-01,9.0,1000,2023-06-18T06:34:59.973+0000
9,2020-01-01,9.0,1000,2023-06-18T06:34:59.973+0000
10,2020-02-01,9.0,1000,2023-06-18T06:34:59.973+0000
11,2020-03-01,9.0,1000,2023-06-18T06:34:59.973+0000
