In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, TimestampType

file_location = "/FileStore/bookings.csv"

# Load the CSV file into a DataFrame
df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(file_location)

df.show()
df.printSchema()

# Define schema for the bookings table
schema = StructType([
    StructField("bookid", IntegerType(), True),
    StructField("facid", IntegerType(), True),
    StructField("memid", IntegerType(), True),
    StructField("starttime", TimestampType(), True),
    StructField("slots", IntegerType(), True)
])

# Read data from CSV file into DataFrame with predefined schema
df = spark.read.format("csv").option("header", "true").schema(schema).load(file_location)

# Drop the table if it already exists
spark.sql("DROP TABLE IF EXISTS bookings")

# Write data from DataFrame into managed table
df.write.saveAsTable("bookings")

# Verify the table creation
spark.sql("SELECT * FROM bookings").show()


+------+-----+-----+-------------------+-----+
|bookid|facid|memid|          starttime|slots|
+------+-----+-----+-------------------+-----+
|     0|    3|    1|2012-07-03 11:00:00|    2|
|     1|    4|    1|2012-07-03 08:00:00|    2|
|     2|    6|    0|2012-07-03 18:00:00|    2|
|     3|    7|    1|2012-07-03 19:00:00|    2|
|     4|    8|    1|2012-07-03 10:00:00|    1|
|     5|    8|    1|2012-07-03 15:00:00|    1|
|     6|    0|    2|2012-07-04 09:00:00|    3|
|     7|    0|    2|2012-07-04 15:00:00|    3|
|     8|    4|    3|2012-07-04 13:30:00|    2|
|     9|    4|    0|2012-07-04 15:00:00|    2|
|    10|    4|    0|2012-07-04 17:30:00|    2|
|    11|    6|    0|2012-07-04 12:30:00|    2|
|    12|    6|    0|2012-07-04 14:00:00|    2|
|    13|    6|    1|2012-07-04 15:30:00|    2|
|    14|    7|    2|2012-07-04 14:00:00|    2|
|    15|    8|    2|2012-07-04 12:00:00|    1|
|    16|    8|    3|2012-07-04 18:00:00|    1|
|    17|    1|    0|2012-07-05 17:30:00|    3|
|    18|    2

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType

# Define the schema for the members table
schema = StructType([
    StructField("memid", IntegerType(), True),
    StructField("surname", StringType(), True),
    StructField("firstname", StringType(), True),
    StructField("address", StringType(), True),
    StructField("zipcode", StringType(), True),
    StructField("telephone", StringType(), True),
    StructField("recommendedby", IntegerType(), True),
    StructField("joindate", DateType(), True)
])

file_location = "/FileStore/facilities.csv"

df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(file_location)

df.show()
df.printSchema()

# Drop the table if it already exists
spark.sql("DROP TABLE IF EXISTS members")

# Write data from DataFrame into a managed table
df.write.saveAsTable("members")

# Verify the data loaded into the managed table
spark.sql("SELECT * FROM members").show()


+-----+---------------+----------+---------+-------------+------------------+
|facid|           name|membercost|guestcost|initialoutlay|monthlymaintenance|
+-----+---------------+----------+---------+-------------+------------------+
|    0| Tennis Court 1|       5.0|     25.0|        10000|               200|
|    1| Tennis Court 2|       5.0|     25.0|         8000|               200|
|    2|Badminton Court|       0.0|     15.5|         4000|                50|
|    3|   Table Tennis|       0.0|      5.0|          320|                10|
|    4| Massage Room 1|      35.0|     80.0|         4000|              3000|
|    5| Massage Room 2|      35.0|     80.0|         4000|              3000|
|    6|   Squash Court|       3.5|     17.5|         5000|                80|
|    7|  Snooker Table|       0.0|      5.0|          450|                15|
|    8|     Pool Table|       0.0|      5.0|          400|                15|
+-----+---------------+----------+---------+-------------+------

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType

# Define the schema for the facilities table
schema = StructType([
    StructField("facid", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("membercost", DoubleType(), True),
    StructField("guestcost", DoubleType(), True),
    StructField("initialoutlay", DoubleType(), True),
    StructField("monthlymaintenance", DoubleType(), True)
])

file_location = "/FileStore/members.csv"

df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(file_location)

df.show()
df.printSchema()

# Drop the table if it already exists
spark.sql("DROP TABLE IF EXISTS facilities")

# Write data from DataFrame into a managed table
df.write.saveAsTable("facilities")

# Verify the data loaded into the managed table
spark.sql("SELECT * FROM facilities").show()



+-----+---------+---------+--------------------+-------+--------------+-------------+-------------------+
|memid|  surname|firstname|             address|zipcode|     telephone|recommendedby|           joindate|
+-----+---------+---------+--------------------+-------+--------------+-------------+-------------------+
|    0|    GUEST|    GUEST|               GUEST|      0|(000) 000-0000|         null|2012-07-01 00:00:00|
|    1|    Smith|   Darren|8 Bloomsbury Clos...|   4321|  555-555-5555|         null|2012-07-02 12:02:05|
|    2|    Smith|    Tracy|8 Bloomsbury Clos...|   4321|  555-555-5555|         null|2012-07-02 12:08:23|
|    3|   Rownam|      Tim|23 Highway Way, B...|  23423|(844) 693-0723|         null|2012-07-03 09:32:15|
|    4| Joplette|   Janice|20 Crossing Road,...|    234|(833) 942-4710|            1|2012-07-03 10:25:05|
|    5|  Butters|   Gerald|1065 Huntingdon A...|  56754|(844) 078-4130|            1|2012-07-09 10:44:09|
|    6|    Tracy|   Burton|3 Tunisia Drive, ..