In [1]:
# Imports nécessaires
from pyspark.ml.recommendation import ALSModel
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, from_json
from pyspark.sql.types import (FloatType, IntegerType, StructField, StructType,
                               TimestampType)

# Configuration Spark
spark = SparkSession.builder \
    .appName("RealTimeRecommendations") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1") \
    .getOrCreate()

:: loading settings :: url = jar:file:/opt/spark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-ed9a25f3-3bab-4bb3-9af3-38ef5910c470;1.0
	confs: [default]
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.5.1 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.5.1 in central
	found org.apache.kafka#kafka-clients;3.4.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.10.3 in central
	found org.slf4j#slf4j-api;2.0.7 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.4 in central
	found org.apache.hadoop#hadoop-client-api;3.3.4 in central
	found commons-logging#commons-logging;1.1.3 in central
	found com.google.code.findbugs#jsr305;3.0.0 in central
	found org.apache.commons#commons-pool2;2.11.1 in central
:: resolution report :: resolve 763ms :: artifacts dl 48ms
	:: modules in us

In [2]:
# Définition du schéma
schema = StructType([
    StructField("userId", IntegerType(), True),
    StructField("movieId", IntegerType(), True),
    StructField("rating", FloatType(), True),
    StructField("timestamp", TimestampType(), True)
])

# Configuration Kafka
KAFKA_BROKER = 'bigdata-container:9092'
TOPIC_NAME = 'movieratings'
MODEL_PATH = "hdfs://namenode:9000/models/als_recommender"

In [3]:
# Configuration du streaming Kafka
kafka_stream = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", KAFKA_BROKER) \
    .option("subscribe", TOPIC_NAME) \
    .load()

# Transformation du stream
parsed_stream = kafka_stream.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), schema).alias("data")) \
    .select("data.*")

In [None]:
# Chargement du modèle et transformation
als_model = ALSModel.load(MODEL_PATH)
predictions = als_model.transform(parsed_stream)

# Configuration et démarrage du streaming
query = predictions.select("userId", "movieId", "prediction") \
    .writeStream \
    .outputMode("append") \
    .format("console") \
    .start()

# Attente des résultats
print("En attente des messages...")
query.awaitTermination(timeout=60)