# Worked Examples & Exercises - Part 3

## Create Another Kafka Topic

In [None]:
#Download Kafka
!wget https://downloads.apache.org/kafka/3.9.0/kafka_2.12-3.9.0.tgz
!tar -xzf kafka_2.12-3.9.0.tgz
!sudo mv kafka_2.12-3.9.0 /usr/local/kafka

In [None]:
#Set Environment Variables
!echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc
!echo "export PATH=\$PATH:\$KAFKA_HOME/bin" >> ~/.bashrc

# Export for current session
import os
os.environ["KAFKA_HOME"] = "/usr/local/kafka"
os.environ["PATH"] = os.environ["PATH"] + ":" + os.environ["KAFKA_HOME"] + "/bin"

print("Environment variables set for this session.")

In [None]:
#Install Zookeeper and kafkacat
!sudo apt-get update
!sudo apt-get install -y zookeeper
!sudo apt-get install -y kafkacat

In [None]:
#Set Zookeeper Environment Variables
!echo "export ZOOKEEPER_HOME=/usr/share/zookeeper" >> ~/.bashrc
!echo "export PATH=\$PATH:\$ZOOKEEPER_HOME/bin" >> ~/.bashrc

# Export for current session
import os
os.environ["ZOOKEEPER_HOME"] = "/usr/share/zookeeper"
os.environ["PATH"] = os.environ["PATH"] + ":" + os.environ["ZOOKEEPER_HOME"] + "/bin"

print("Zookeeper environment variables set for this session.")

In [None]:
#Start Kafka and Zookeeper
# Start Zookeeper first
!sudo /usr/share/zookeeper/bin/zkServer.sh start

# Add a short delay to ensure Zookeeper is fully started
!sleep 5

# Start Kafka using the full path
!/usr/local/kafka/bin/kafka-server-start.sh -daemon /usr/local/kafka/config/server.properties

# Add a short delay to ensure Kafka is fully started
!sleep 5

print("Zookeeper and Kafka started.")

In [None]:
# Create your labWeather topic here
!/usr/local/kafka/bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 5 --topic labWeather

## Example 7: Check Topic Details

In [None]:
!/usr/local/kafka/bin/kafka-topics.sh --describe --bootstrap-server localhost:9092 \  --topic labWeather

**What the code does:**
- Shows replication factor, partition count, and leader info for weatherTopic.

**Exercise 7**:
- Describe your labWeather topic.
- **Question**: How many partitions and which broker is the leader?

In [None]:
# Describe your labWeather topic here
!!/usr/local/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 localhost:2181 \
  --topic labWeather

In [None]:
%%bash
# Replace YOUR_API_KEY with your actual OpenweatherMap API key
# Note: This is a persistent loop. Use Ctrl+C to stop it when needed.

while true
do
  curl -s "https://api.openweathermap.org/data/2.5/weather?lat=44.34&lon=10.99&appid=d92c8dcedeeefe00c84b70b3a7e408d9" |\
  kafka-console-producer --broker-list localhost:9092 --topic labWeather
  sleep 0
done

In [None]:
%%bash
# Your modified script here to fetch weather for another city - Paris
# Remember to replace YOUR_API_KEY with your actual OpenWeatherMap API key

while true
do
  curl -s "https://api.openweathermap.org/data/2.5/weather?lat=44.34&lon=10.99&appid=YOUR_API_KEY" |\
  kafka-console-producer --broker-list localhost:9092 --topic labWeather
  sleep 30
done

## Spark Structured Streaming from Kafka

## **View the last five messages from the entire topic**

In [None]:

!kafkacat -C -b localhost:9092 -t labWeather -o -5 -e

## **View the last five messages from the topic within a specific partition (3)**

In [None]:
!kafkacat -C -b localhost:9092 -t labWeather -p 3 -o -5 -e

## **Describe the topic again, checking for changes**

In [None]:
!/usr/local/kafka/bin/kafka-topics.sh --describe --bootstrap-server localhost:9092 \  --topic labWeather

## **Consume messages from the beginning**

In [None]:
!kafkacat -C -b localhost:9092 -t labWeather -o beginning -e

In [None]:
# PySpark code
# For Scala, use the commented example below in a Spark shell or notebook with Scala kernel

# PySpark code
# For Scala, use the commented example below in a Spark shell or notebook with Scala kernel
# See the latest SparkQL-Kafka connector here https://repo1.maven.org/maven2/org/apache/spark/spark-sql-kafka-0-10_2.12/

from pyspark.sql import SparkSession

# Create SparkSession with Kafka packages
spark = SparkSession.builder \
    .appName("KafkaSparkStreaming") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.5") \
    .getOrCreate()

# Read from Kafka
kafkaDF = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "labWeather") \
    .load()

# Process the streaming data
query = kafkaDF.selectExpr("CAST(value AS STRING)") \
    .writeStream \
    .format("console") \
    .start()

query.awaitTermination()

In [None]:
#Check the Spark version currently running in the notebook
print(spark.version)
