In [1]:
# Imports nécessaires
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

# Configuration Spark optimisée
spark = SparkSession.builder \
    .appName("MovieRecommendation") \
    .config("spark.driver.memory", "2g") \
    .config("spark.executor.memory", "2g") \
    .config("spark.sql.shuffle.partitions", "10") \
    .getOrCreate()

try:
    # Chargement des données
    ratings_df = spark.read.parquet("hdfs://namenode:9000/datasets/clean_movies_ratings")
    print(f"Nombre total de ratings: {ratings_df.count()}")
    
    # Préparation des données
    (training, test) = ratings_df.randomSplit([0.8, 0.2])
    
    # Configuration et entraînement du modèle
    als = ALS(
        maxIter=5,
        regParam=0.01,
        rank=10,
        userCol="userId",
        itemCol="movieId",
        ratingCol="rating",
        coldStartStrategy="drop"
    )
    
    model = als.fit(training)
    
    # Évaluation
    predictions = model.transform(test)
    evaluator = RegressionEvaluator(
        metricName="rmse",
        labelCol="rating",
        predictionCol="prediction"
    )
    
    rmse = evaluator.evaluate(predictions)
    print(f"\nPerformance du modèle (RMSE): {rmse:.2f}")

    # Sauvegarde du modèle avec overwrite
    model_path = "hdfs://namenode:9000/models/als_recommender"
    model.write().overwrite().save(model_path)
    print(f"\nModèle sauvegardé dans: {model_path}")

except Exception as e:
    print(f"Erreur: {str(e)}")

finally:
    spark.stop()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/05/05 02:46:01 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25/05/05 02:46:04 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
                                                                                

Nombre total de ratings: 20000263


25/05/05 02:48:01 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
25/05/05 02:48:05 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.lapack.JNILAPACK
                                                                                


Performance du modèle (RMSE): 0.81


                                                                                


Modèle sauvegardé dans: hdfs://namenode:9000/models/als_recommender
