<a href="https://colab.research.google.com/github/aquilinoFrancisco/cjf/blob/main/sistemaRecomendaci%C3%B3n.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import os
import random
import numpy as np
import pandas as pd
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS

In [21]:
from pyspark.sql import SparkSession, DataFrame as spDataFrame
import pyspark as ps
from pyspark import SparkConf, SparkContext
import pyspark.sql.functions as F



# Configuración de pyspark

In [10]:
def _conf_pyspark(self):
        conf = ps.SparkConf().setAll([("spark.network.timeout", "3000s"),
                                      ("spark.executor.heartbeatInterval", "1200s"),
                                      ("spark.driver.cores", '2'),
                                      ("spark.driver.memory", '8g'),
                                      ('spark.executor.instances', '6'),
                                      ('spark.executor.cores', '1'),
                                      ("spark.python.executor.memory", "2g"),
                                      ('spark.cores.max', '8'),
                                      ("spark.sql.debug.maxToStringFields", 5000),
                                      ("spark.sql.shuffle.partitions", "1"),
                                      ("spark.sql.execution.arrow.pyspark.enabled", "true")])

In [None]:
conf = self._conf_pyspark()
self.spark = SparkSession.builder.config(conf=conf).master("local").appName("Sistema de recomendaciones").getOrCreate()

# Paso 1: Procesamiento de Datos

In [17]:
# Extract the dataset and load the data into PySpark DataFrame
def load_data(filename):
    df = spark.read.csv(filename, header=True, inferSchema=True)
    # Convert the columns to the appropriate data types
    df = df.withColumn('userId', ratings_data['userId'].cast('integer'))
    df = df.withColumn('movieId', ratings_data['movieId'].cast('integer'))
    df = df.withColumn('rating', ratings_data['rating'].cast('float'))
    return df

# Paso 2: Construir el Modelo de Recomendación

In [18]:
# Build the recommendation model using ALS (Alternating Least Squares)
def train_model(df, rank):
    als = ALS(userCol='userId', itemCol='movieId', ratingCol='rating',rank=rank, maxIter=10, implicitPrefs=True)
    model = als.fit(df)
    return model

#  Paso 3: Generar recomendaciones

In [None]:
def recommend(model, userID, num_recommendations):
    recommendations = model.recommendProducts(userID, num_recommendations)
    return recommendations

#Paso 4: Evaluar el modelo

In [None]:
def evaluate_model(testing_data,model):
    predictions = model.transform(testing_data)
    evaluator = RegressionEvaluator(metricName='rmse', labelCol='rating', predictionCol='prediction')
    rmse = evaluator.evaluate(predictions)
    return rmse

In [19]:
def main():
    spark = SparkSession.builder.appName("SistemaDeRecomendaciones").getOrCreate()
    df = load_data("ratings.csv")
    # Split the dataset into training and testing sets
    (training_data, testing_data) = df.randomSplit([0.8, 0.2])
    model = train_model(df, 10)
    recommendations = recommend(model, 1, 5)
    print(recommendations)
    accuracy = evaluate_model (testing_data,model)
    print(f"Root Mean Squared Error (RMSE): {accuracy}")
    # Stop the Spark session
    spark.stop()

In [None]:
if __name__ == "__main__":
    main()