## Azure Databricks: Load the data using Spark SQL

By Selman Karaosmanoglu

In [0]:
from pyspark.sql import DataFrame

In [0]:
dataframe_names = ['payment','rider','station','trip']

In [0]:
# User-defined functions

def drop_table(spark_session, table_name):
    """Drop the table if it exists"""
    try:
        spark_session.sql(f"DROP TABLE IF EXISTS {table_name}")
        print(f"Table {table_name} dropped successfully.")
    except Exception as e:
        print(f"An error occurred while dropping the table {table_name}: {e}")


In [0]:
def create_table(spark_session, table_name, file_path) -> DataFrame:
    """Create a Delta table from the specified location if it does not already exist."""
    try:
        create_table_query = f"""
                                CREATE TABLE IF NOT EXISTS {table_name}
                                USING DELTA
                                LOCATION '{file_path}'
                             """
        spark_session.sql(create_table_query)
        print(f"Table {table_name} created successfully from the location {file_path}.")
        return spark.table(table_name)
    except Exception as e:
        print(f"An error occurred while creating the table {table_name}: {e}")

In [0]:
def refresh_bronze_table(name):
    "Drop and Create bronze table"
    bronze_table_name = f"{name}_bronze"
    path = f"/delta/{bronze_table_name}"
    drop_table(spark, bronze_table_name)
    return create_table(spark, bronze_table_name, path)


In [0]:
# Iterate dataframe names, create tables
list(map(refresh_bronze_table, dataframe_names))

Table payment_bronze dropped successfully.
Table payment_bronze created successfully from the location /delta/payment_bronze.
Table rider_bronze dropped successfully.
Table rider_bronze created successfully from the location /delta/rider_bronze.
Table station_bronze dropped successfully.
Table station_bronze created successfully from the location /delta/station_bronze.
Table trip_bronze dropped successfully.
Table trip_bronze created successfully from the location /delta/trip_bronze.


[DataFrame[payment_id: int, date: date, amount: double, rider_id: int],
 DataFrame[rider_id: int, first_name: string, last_name: string, address: string, birthdate: date, account_start_date: date, account_end_date: date, is_member: boolean],
 DataFrame[station_id: string, name: string, latitude: double, longitude: double],
 DataFrame[trip_id: string, rideable_type: string, started_at: timestamp, ended_at: timestamp, start_station_id: string, end_station_id: string, rider_id: int]]