### Setting Up the Notebook

In [0]:
%pip uninstall -y databricks_helpers 
%pip install git+https://github.com/data-derp/databricks_helpers#egg=databricks_helpers 

In [0]:
exercise_name = "final_day_presentation"

In [0]:
from databricks_helpers.databricks_helpers import DataDerpDatabricksHelpers

helpers = DataDerpDatabricksHelpers(dbutils, exercise_name)

current_user = helpers.current_user()
working_directory = helpers.working_directory()

print(f"Your current working directory is: {working_directory}")

In [0]:
# Reading dataset from filepath
file_path = working_directory + "/Swiggy_dataset.csv"

display(file_path)

### Defining the Schema and creating the Dataframe from the dataset

In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.functions import col
from pyspark.sql.types import *

def create_dataframe(filepath: str) -> DataFrame:


    swiggy_schema = StructType([
    StructField("Type", StringType(), True),
    StructField("ID", IntegerType(), True),
    StructField("Name", StringType(), True),
    StructField("UUID", StringType(), True),
    StructField("City", StringType(), True),
    StructField("Area", StringType(), True),
    StructField("Avg_rating", DoubleType(), True),
    StructField("Total_ratings", StringType(), True),
    StructField("Cuisine", StringType(), True),
    StructField("Cost_for_two", StringType(), True),
    StructField("Delivery_time", IntegerType(), True),
    StructField("Min_Delivery_time", IntegerType(), True),
    StructField("Max_Delivery_time", IntegerType(), True),
    StructField("Address", StringType(), True),
    StructField("Locality", StringType(), True),
    StructField("Unserviceable", BooleanType(), True),
    StructField("Vegetarian", BooleanType(), True)
])
    
    swiggy_df = spark.read.format("csv") \
        .option("header", True) \
        .option("delimiter", ",") \
        .option("escape", "\\") \
        .schema(swiggy_schema) \
        .load(filepath)
    return swiggy_df

swiggy_df = create_dataframe(file_path)
swiggy_df.printSchema()
display(swiggy_df)

**Optional**: To validate the distinct values for given columns

In [0]:
# swiggy_df.select("Avg_rating","Total_ratings","Cost_for_two").distinct().show()

### Writing the Dataframe to the Bronze Output Directory

In [0]:
output_dir = working_directory + "/bronze"

swiggy_df.write.mode("overwrite").parquet(output_dir)