In [1]:

import sys
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating


In [2]:

def loadMovieNames():
    movieNames = {}
    with open("/home/hashimyousaf/spark-2.4.0-bin-hadoop2.7/bin/jupyter-scripts/dataset/ml-100k/u.item", encoding = "ISO-8859-1", errors="ignore") as f:
        for line in f:
            fields = line.split('|')
            movieNames[int(fields[0])] = fields[1]
    return movieNames

conf = SparkConf().setMaster("local[*]").setAppName("MovieRecommendationsALS")
sc = SparkContext(conf = conf)


In [3]:
sc.setCheckpointDir('checkpoint')

print("\nLoading movie names...")
nameDict = loadMovieNames()




Loading movie names...


In [4]:
data = sc.textFile("file:///home/hashimyousaf/spark-2.4.0-bin-hadoop2.7/bin/jupyter-scripts/dataset/ml-100k/u.data")

ratings = data.map(lambda l: l.split()).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2]))).cache()



In [23]:

# Build the recommendation model using Alternating Least Squares
print("\nTraining recommendation model...")
rank = 10
# Lowered numIterations to ensure it works on lower-end systems
numIterations = 6
model = ALS.train(ratings, rank, numIterations)

userID =  0 #int(sys.argv[1])



Training recommendation model...


In [24]:


print("\nRatings for user ID " + str(userID) + ":")
userRatings = ratings.filter(lambda l: l[0] == userID)
for rating in userRatings.collect():
    print (nameDict[int(rating[1])] + ": " + str(rating[2]))





Ratings for user ID 0:
Godfather: Part II, The (1974): 5.0
Empire Strikes Back, The (1980): 5.0
Gone with the Wind (1939): 1.0


In [25]:

print("\nTop 10 recommendations:")
recommendations = model.recommendProducts(userID, 10)
for recommendation in recommendations:
    print (nameDict[int(recommendation[1])] + \
        " score " + str(recommendation[2]))
    


Top 10 recommendations:
Three Caballeros, The (1945) score 9.00286425435626
Haunted World of Edward D. Wood Jr., The (1995) score 7.599893612287159
Harlem (1993) score 7.414945386287301
Love and a .45 (1994) score 7.295781204746187
Endless Summer 2, The (1994) score 6.944290078594165
Color of Night (1994) score 6.926991292492175
Grass Harp, The (1995) score 6.732730997119194
Roommates (1995) score 6.720739506353633
Love in the Afternoon (1957) score 6.698767168912005
Nosferatu (Nosferatu, eine Symphonie des Grauens) (1922) score 6.691377598722749
