## Access dataframe using SQL
- Create temporary views on dataframes
- Access the view from SQL cell
- Access the view from Python Cell

In [0]:
%run "../includes/configurations"

In [0]:
race_results_df = spark.read.parquet(f"{presentation_folder_path}/race_results")

In [0]:
# create a temporary view on top of this dataframe
# it is valid only for this spark session
# race_results_df.createTempView("v_race_results")

# better to use createOrReplaceTempView ALWAYS
race_results_df.createOrReplaceTempView("v_race_results")

In [0]:
%sql
SELECT * FROM v_race_results
WHERE race_year = 2020 AND driver_nationality = 'Russian'

In [0]:
# spark.sql return the data into a dataframe, we can pass parameters by using this command in the python cell

p_race_year = 2019
race_results_p_df = spark.sql(f"SELECT * FROM v_race_results WHERE race_year = {p_race_year}")
race_results_p_df.limit(10).display()

## Global Temporary View
- Create global temporary views on dataframes
- Access the view from SQL Cell
- Access the view from Python Cell
- Access the view from another notebook

In [0]:
# create a global temporary view which will be used across notebook
race_results_df.createOrReplaceGlobalTempView("gv_race_results")

In [0]:
%sql
SHOW TABLES IN global_temp;

In [0]:
%sql
SELECT * FROM global_temp.gv_race_results
WHERE race_year = 2020;

In [0]:
driver_standings = spark.sql("""
                        SELECT race_year, race_name, driver_name, driver_nationality, team, sum(points)
                        FROM global_temp.gv_race_results
                        WHERE position <= 10
                        GROUP BY race_year, race_name, driver_name, driver_nationality, team
                        ORDER BY sum(points) DESC""")

In [0]:
driver_standings.display()