### Exploring the ALS algorithm

In [1]:
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

#### Loading the rating data set

In [None]:
rating_data = sc.textFile("data/ml-100k/u.data")
print rating_data.first()

In [None]:
# Load and parse the data
ratings = rating_data.map(lambda line: line.split('\t')) \
            .map(lambda record: Rating(int(record[0]), int(record[1]), float(record[2])))

In [None]:
# Build the recommendation model using Alternating Least Squares
rank = 10
iterations = 10
lambda_ = 0.01
model = ALS.train(ratings, rank, iterations, lambda_)

In [None]:
# Evaluate the model on training data
testdata = ratings.map(lambda p: (p[0], p[1]))
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
print("Mean Squared Error = " + str(MSE))

In [None]:
model.userFeatures().count()

In [None]:
model.productFeatures().count()

In [None]:
# Let's get the predicted rating for movie 123 for user 789:
predictedRating = model.predict(789, 123)

In [None]:
predictedRating

In [None]:
# Let's generate the top 10 recommended items for user 789:
userId = 789
K = 10
topKRecs = model.recommendProducts(userId, K)

In [None]:
topKRecs

In [None]:
# Let's find the top 10 rated items by user 789:
topKRated = ratings.keyBy(lambda r: r.user).lookup(userId)
topKRated.sort(key = lambda r: r.rating, reverse=True)

In [None]:
topKRated = topKRated[:K]
topKRated

#### Loading the movie data set

In [None]:
movies = sc.textFile("data/ml-100k/u.item")
print movies.first()

In [None]:
titles = movies.map(lambda line: line.split("|")[:2]).map(lambda record: (int(record[0]), record[1])).collectAsMap()

In [None]:
titles[123]

In [None]:
# check the top recommended items
for rating in topKRecs:
    print (titles[rating.product], rating.rating)

In [None]:
# check the top rated items
for rating in topKRated:
    print (titles[rating.product], rating.rating)