# 01 INTRODUCTION

- The spark session object
- Create a dataframe from a python list including schema information
- Print the schema of a dataframe
- Write a dataframe to the **Sparkâ€™s warehouse directory** as a managed table
- Create a dataframe by executing an SQL query with spark.sql()

In [1]:
# 1. Create a spark session

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()
spark.version

'4.0.1'

In [None]:
# 2. Create a dataframe from a python list and a schema

from datetime import date

students_schema = 'id long, name string, enrolment_date date, gpa double'
students_data = [(1, "Nikos Zikos", date(2025, 9, 1), 7.7),
                 (2, "Maria Pappa", date(2025, 9, 2), 6.7),
                 (3, "Petros Kokkinos", date(2025, 9, 1), 4.6)]
students_df = spark.createDataFrame(students_data, students_schema)
students_df.show()

+---+---------------+--------------+---+
| id|           name|enrolment_date|gpa|
+---+---------------+--------------+---+
|  1|    Nikos Zikos|    2025-09-01|7.7|
|  2|    Maria Pappa|    2025-09-02|6.7|
|  3|Petros Kokkinos|    2025-09-01|7.6|
+---+---------------+--------------+---+



In [3]:
# 3. Print the schema of the dataframe

students_df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- enrolment_date: date (nullable = true)
 |-- gpa: double (nullable = true)



In [4]:
# 4. Write dataframe as table "students" using the default format (parquet)

students_df.write.mode("overwrite").saveAsTable("students")

In [5]:
# 5. Load dataframe from table "students" 

students_df2 = spark.table("students")
students_df2.show()

+---+---------------+--------------+---+
| id|           name|enrolment_date|gpa|
+---+---------------+--------------+---+
|  3|Petros Kokkinos|    2025-09-01|7.6|
|  1|    Nikos Zikos|    2025-09-01|7.7|
|  2|    Maria Pappa|    2025-09-02|6.7|
+---+---------------+--------------+---+



In [6]:
# 6. Load dataframe from an SQL

students_df3 = spark.sql("select * from students")
students_df3.show()

+---+---------------+--------------+---+
| id|           name|enrolment_date|gpa|
+---+---------------+--------------+---+
|  3|Petros Kokkinos|    2025-09-01|7.6|
|  1|    Nikos Zikos|    2025-09-01|7.7|
|  2|    Maria Pappa|    2025-09-02|6.7|
+---+---------------+--------------+---+

