In [0]:
from pyspark.sql import SparkSession

**Defining the schema**

In [0]:
from pyspark.sql.types import StructField, StructType, IntegerType, StringType

schema = StructType([
    StructField('Sentence', StringType(), True),
    StructField('Sentiment', IntegerType(), True)
])


**Defining paths for source, target and checkpoints**

In [0]:
source_path = '/Volumes/workspace/default/my_volume/Sentiment_Project/input_files/'
bronze_table_path = '/Volumes/workspace/default/my_volume/Sentiment_Project/bronze_tweets'
checkpoints_path = '/Volumes/workspace/default/my_volume/Sentiment_Project/checkpoints/bronze'

**Reading the data using autoloader to simulate the live streaming**

In [0]:
streaming_df = (
    spark.readStream.format('cloudFiles')
    .option('cloudFiles.format', 'csv')
    .option('header', 'true')
    .schema(schema)
    .load(source_path)
)

**Writing the data to the bronze delta table**

In [0]:
(
    streaming_df.writeStream
    .format('delta')
    .outputMode('append')
    .option('checkpointLocation', checkpoints_path)
    .trigger(availableNow = True)
    .start(bronze_table_path)
)

In [0]:
bronze_df = spark.read.format('delta').load(bronze_table_path)
print(f"Total records: {bronze_df.count()}")
display(bronze_df.limit(10))