In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

riderSchema = StructType([
    StructField("rider_id", IntegerType()),
    StructField("first", StringType()),
    StructField("last", StringType()),
    StructField("address", StringType()),
    StructField("birthday", DateType()),
    StructField("account_start_date", DateType()),
    StructField("account_end_date", DateType()),
    StructField("is_member", BooleanType())
    ])

paymentSchema = StructType([
    StructField("payment_id", IntegerType()),
    StructField("date", DateType()),
    StructField("amount", FloatType()),
    StructField("rider_id", IntegerType())
    
    ])

stationSchema = StructType([
    StructField("station_id", StringType()),
    StructField("name", StringType()),
    StructField("latitude", FloatType()),
    StructField("longitude", FloatType())
    
    ])

tripSchema = StructType([
    StructField("trip_id", StringType()),
    StructField("rideable_type", StringType()),
    StructField("start_at", TimestampType()),
    StructField("ended_at", TimestampType()),
    StructField("start_station_id", StringType()),
    StructField("end_station_id", StringType()),
    StructField("rider_id", IntegerType())
    
    ])
df = spark.read.load('/FileStore/bikeshare_source/riders.csv',
    format='csv',
    schema=riderSchema,
    header=False)
df.write.format("delta").mode("overwrite").save("/delta/riders")

df = spark.read.load('/FileStore/bikeshare_source/payments.csv',
    format='csv',
    schema=paymentSchema,
    header=False)
df.write.format("delta").mode("overwrite").save("/delta/payments")

df = spark.read.load('/FileStore/bikeshare_source/stations.csv',
    format='csv',
    schema=stationSchema,
    header=False)
df.write.format("delta").mode("overwrite").save("/delta/stations")

df = spark.read.load('/FileStore/bikeshare_source/trips.csv',
    format='csv',
    schema=tripSchema,
    header=False)
df.write.format("delta").mode("overwrite").save("/delta/trips")



In [0]:
%sql

DROP TABLE IF EXISTS riders;
CREATE TABLE riders 
(rider_id INTEGER , 
first VARCHAR(50), 
last VARCHAR(50), 
address VARCHAR(100), 
birthday DATE, 
account_start_date DATE, 
account_end_date DATE, 
is_member BOOLEAN)
USING DELTA;

DROP TABLE IF EXISTS payments;
CREATE TABLE payments (payment_id INTEGER , date DATE, amount FLOAT, rider_id INTEGER)
USING DELTA;

DROP TABLE IF EXISTS stations;
CREATE TABLE stations (station_id VARCHAR(50) , name VARCHAR(75), latitude FLOAT, longitude FLOAT)
USING DELTA;

DROP TABLE IF EXISTS trips;
CREATE TABLE trips (trip_id VARCHAR(50) , rideable_type VARCHAR(75), start_at TIMESTAMP, ended_at TIMESTAMP, start_station_id VARCHAR(50), end_station_id VARCHAR(50), rider_id INTEGER)
USING DELTA;



In [0]:
df=spark.read.format("delta").load("/delta/riders")
df.write.format("delta").mode("append").saveAsTable("riders123")