Anime Recommendation Engine

In [66]:
%config IPCompleter.greedy=True
from pyspark import SparkContext
from pyspark.sql import SparkSession

from pyspark.mllib.recommendation import ALS
from pyspark.mllib.recommendation import Rating

from sagemaker import get_execution_role
import sagemaker_pyspark

role = get_execution_role()

jars = sagemaker_pyspark.classpath_jars()

classpath = ":".join(sagemaker_pyspark.classpath_jars())

spark = SparkSession.builder.appName("Anime Recommendation System with PySpark") \
    .config("spark.driver.extraClassPath", classpath) \
    .config("spark.jars.packages", "org.apache.hadoop:hadoop-aws:2.6.3,org.apache.hadoop:hadoop-common:2.6.3") \
    .master("local[*]").getOrCreate()
spark

In [None]:
region = "eu-central-1"
spark._jsc.hadoopConfiguration().set('fs.s3a.endpoint', 's3.{}.amazonaws.com'.format(region))

# Read data from S3
rating = spark.sparkContext.textFile('s3a://anime-recommendation-system/preprocessed-data/rating.csv')
header = rating.first() #extract header
rating = rating.filter(lambda row: row != header)   #filter out header
rating.take(3)

In [None]:
rating = rating.map(lambda l: l.split(',')).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2]))) 
rating.take(3)

In [83]:
rank, iter, lambda_ = 50, 10, 0.1
model = ALS.train(rating, rank=rank, iterations=iter, lambda_=lambda_, seed=5047)

In [None]:
model.userFeatures().count()

In [None]:
model.productFeatures().count()

In [None]:
predicted_rating = model.predict(2485327, 59)
predicted_rating

Recommend the K number of animes for same user

In [None]:
userId= 2485327
K=10
top_k_recs = model.recommendProducts(userId,K)
for i in top_k_recs:
    print(i)

In [None]:
model.save(SparkContext.getOrCreate(), 'model')

In [None]:
import boto3
import os

# Initialize S3 client
s3 = boto3.client('s3')

# Upload files to the created bucket
bucketname = 'anime-recommendation-system'
local_directory = './model'
destination = 'model/'
for root, dirs, files in os.walk(local_directory):
    for filename in files:
        # construct the full local path
        local_path = os.path.join(root, filename)

        relative_path = os.path.relpath(local_path, local_directory)
        s3_path = os.path.join(destination, relative_path)
        
        s3.upload_file(local_path, bucketname, s3_path)