In [31]:
from pyspark.sql.functions import from_json, col, to_timestamp
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql import SparkSession

In [32]:
def write_to_cassandra(target_df, batch_id):
    target_df.write \
        .format("org.apache.spark.sql.cassandra") \
        .option("keyspace", "spark_db") \
        .option("table", "customer_search") \
        .mode("append") \
        .save()
    target_df.show()

In [33]:
spark = SparkSession \
        .builder \
        .master("local[3]") \
        .appName("Stream Table Join Demo") \
        .config("spark.streaming.stopGracefullyOnShutdown", "true") \
        .config("spark.sql.shuffle.partitions", 2) \
        .config("spark.cassandra.connection.host", "localhost") \
        .config("spark.cassandra.connection.port", "9042") \
        .config("spark.sql.extensions", "com.datastax.spark.connector.CassandraSparkExtensions") \
        .config("spark.sql.catalog.lh", "com.datastax.spark.connector.datasource.CassandraCatalog") \
        .getOrCreate()

In [34]:
search_schema = StructType([
        StructField("id", StringType()),
        StructField("customer_id", StringType()),
        StructField("customer_name", StringType()),
        StructField("product_searched", StringType()),
        StructField("search_date", StringType()),
        StructField("country_name", StringType()),
        StructField("state", StringType())
    ])

In [35]:
kafka_source_df = spark \
        .readStream \
        .format("kafka") \
        .option("kafka.bootstrap.servers", "localhost:9092") \
        .option("subscribe", "product-customer-qty") \
        .option("startingOffsets", "earliest") \
        .option("failOnDataLoss", False) \
        .load()

In [36]:
value_df = kafka_source_df.select(from_json(col("value").cast("string"), search_schema).alias("value"))

In [37]:
prod_customer_df = value_df.select("value.*") \
        .withColumn("search_date", to_timestamp(col("search_date"), "yyyy-MM-dd HH:mm:ss"))

In [38]:
prod_customer_df.printSchema()

root
 |-- id: string (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- customer_name: string (nullable = true)
 |-- product_searched: string (nullable = true)
 |-- search_date: timestamp (nullable = true)
 |-- country_name: string (nullable = true)
 |-- state: string (nullable = true)



In [39]:
output_df = prod_customer_df.select(col("id"), col("customer_id"), col("customer_name"), col("product_searched"), 
                             col("search_date"), col("country_name"),col("state"))
#                             .withColumn("idNum", col("customer_id").cast(IntegerType())) \
#                             .withColumn("id",    col("id").cast(IntegerType()))

In [40]:
# #to view the data in the console
notification_writer_query = output_df.writeStream \
        .format("console") \
        .outputMode("append") \
        .option("truncate", "false") \
        .option("checkpointLocation", "./checkpoints/cassandra-proj/") \
        .start()

# # notification_writer_query.awaitTermination()

In [41]:
#Aggregations Val

# agg_output_df = output_df.groupBy("country_name", "product_searched")\
#       .agg(count("id")).alias("search_qty")

# agg_search_locations_df = output_df.groupBy("name")\
#      .agg(sum("idNum"), count("id"))

In [42]:
# output_query = output_df.writeStream \
output_query = output_df.writeStream \
        .foreachBatch(write_to_cassandra) \
        .outputMode("update") \
        .option("checkpointLocation", "./checkpoints/cassandra-proj") \
        .trigger(processingTime="1 minute") \
        .start()

In [None]:
output_query.awaitTermination()

+---+-----------+-------------+--------------------+-------------------+------------+----------------+
| id|customer_id|customer_name|    product_searched|        search_date|country_name|           state|
+---+-----------+-------------+--------------------+-------------------+------------+----------------+
| 36|          9|          Jox|Gildan Mens Assor...|2023-04-28 20:27:37|    Suriname|Coronie District|
+---+-----------+-------------+--------------------+-------------------+------------+----------------+

+---+-----------+-------------+--------------------+-------------------+------------+--------------+
| id|customer_id|customer_name|    product_searched|        search_date|country_name|         state|
+---+-----------+-------------+--------------------+-------------------+------------+--------------+
| 37|          8|      Maryina|         Water Shoes|2023-04-28 20:28:07|    Tanzania|Zanzibar North|
| 38|          6|       Emilio|Samsung Galaxy s2...|2023-04-28 20:28:37|Turkmeni

+---+-----------+-------------+----------------+-------------------+------------+-----------------+
| id|customer_id|customer_name|product_searched|        search_date|country_name|            state|
+---+-----------+-------------+----------------+-------------------+------------+-----------------+
| 63|         14|       Mirtha|            Roku|2023-04-28 20:41:07|Sierra Leone|Southern Province|
| 64|          8|      Maryina|     Water Shoes|2023-04-28 20:41:37|     Andorra|           Ordino|
+---+-----------+-------------+----------------+-------------------+------------+-----------------+

+---+-----------+-------------+----------------+-------------------+----------------+-----------------+
| id|customer_id|customer_name|product_searched|        search_date|    country_name|            state|
+---+-----------+-------------+----------------+-------------------+----------------+-----------------+
| 65|          2|     Marielys|     Water Shoes|2023-04-28 20:42:07|Papua new Guinea|  

+---+-----------+-------------+----------------+-------------------+--------------+--------------------+
| id|customer_id|customer_name|product_searched|        search_date|  country_name|               state|
+---+-----------+-------------+----------------+-------------------+--------------+--------------------+
| 91|         16|       Marius|      Air Jordan|2023-04-28 20:55:08|      Slovenia|Žirovnica Municip...|
| 92|         20|     JohnEric|       Bed Sheet|2023-04-28 20:55:38|United Kingdom|Coleraine Borough...|
+---+-----------+-------------+----------------+-------------------+--------------+--------------------+

+---+-----------+-------------+----------------+-------------------+--------------------+--------------+
| id|customer_id|customer_name|product_searched|        search_date|        country_name|         state|
+---+-----------+-------------+----------------+-------------------+--------------------+--------------+
| 93|          9|          Jox|     Bed Pillows|2023-0

+---+-----------+-------------+----------------+-------------------+------------+-------------+
| id|customer_id|customer_name|product_searched|        search_date|country_name|        state|
+---+-----------+-------------+----------------+-------------------+------------+-------------+
|119|         19|    Johnathan|        Creatine|2023-04-28 21:09:08|      Taiwan|      Hsinchu|
|120|         18|       Johvay| Ninja Air Frier|2023-04-28 21:09:38|     Lebanon|Mount Lebanon|
+---+-----------+-------------+----------------+-------------------+------------+-------------+

+---+-----------+-------------+----------------+-------------------+------------+--------------------+
| id|customer_id|customer_name|product_searched|        search_date|country_name|               state|
+---+-----------+-------------+----------------+-------------------+------------+--------------------+
|121|          4|         John|            Roku|2023-04-28 21:10:08|      Guyana|Upper Takutu-Uppe...|
|122|      