# TheSparkSession

In [2]:
# Show the details of the SparkSession
spark

In [3]:
# Create a DataFrame and perform an action
myRange = spark.range(1000).toDF("number")
myRange.show(5)

In [4]:
# Create a DataFrame and perform a transformation and an action
divisBy2 = myRange.where("number % 2 = 0")
divisBy2.show(5)

# An End-to-End Example

In [6]:
# Load the data (a transformation)
flightData2015 = spark\
  .read\
  .option("inferSchema", "true")\
  .option("header", "true")\
  .csv("/databricks-datasets/definitive-guide/data/flight-data/csv/2015-summary.csv")

# Show some rows (an action)
flightData2015.take(3)

In [7]:
# Sort the rows (a transformation) and look at the explain plan
flightData2015.sort("count").explain()

In [8]:
# Create a table
flightData2015.createOrReplaceTempView("flight_data_2015")

In [9]:
# Execute a command in the sql way
sqlWay = spark.sql("""
SELECT DEST_COUNTRY_NAME, count(1)
FROM flight_data_2015
GROUP BY DEST_COUNTRY_NAME
""")
sqlWay.explain()

In [10]:
# Execute a command in the DataFrame way
dataFrameWay = flightData2015\
  .groupBy("DEST_COUNTRY_NAME")\
  .count()
dataFrameWay.explain()

We get the same explain plan!

In [12]:
# Execute another action in the DataFrame
from pyspark.sql.functions import max
flightData2015.select(max("count")).take(1)

In [13]:
# Execute a more complex command in the sql way
maxSql = spark.sql("""
SELECT DEST_COUNTRY_NAME, sum(count) as destination_total
FROM flight_data_2015
GROUP BY DEST_COUNTRY_NAME
ORDER BY sum(count) DESC
LIMIT 5
""")

maxSql.show()

In [14]:
# Execute it in the DataFrame way
from pyspark.sql.functions import desc

flightData2015\
  .groupBy("DEST_COUNTRY_NAME")\
  .sum("count")\
  .withColumnRenamed("sum(count)", "destination_total")\
  .sort(desc("destination_total"))\
  .limit(5)\
  .show()

In [15]:
# And check the new explain plan
flightData2015\
  .groupBy("DEST_COUNTRY_NAME")\
  .sum("count")\
  .withColumnRenamed("sum(count)", "destination_total")\
  .sort(desc("destination_total"))\
  .limit(5)\
  .explain()