# <font color='green'> Recommendation - ALS - MAP score<font>

#### Import common libraries

In [1]:
import time
import pandas as pd
import numpy as np
import random
import ml_metrics
from scipy.sparse import coo_matrix

## <font color='green'> 1. Data Preparation<font>

#### Download and unzip data

In [2]:
# Please uncomment the below lines to download and unzip the dataset.
# !wget -N http://files.grouplens.org/datasets/movielens/ml-1m.zip
# !unzip -o ml-1m.zip
# !mv ml-1m/ratings.dat datasets/ratings_1M_down.dat

#### Read the data

In [3]:
df = pd.read_csv("./datasets/ratings_1M.dat",
                 sep="::",
                 engine="python",
                 usecols=[0,1,2],
                 names=["userId", "movieId", "rating"])
df.shape

(1000209, 3)

#### Check Data Types

In [4]:
df.dtypes

userId     int64
movieId    int64
rating     int64
dtype: object

#### Check total number of users

In [5]:
all_users = df.userId.unique().tolist()
len(all_users)

6040

#### Collect test data

In [6]:
test_ids = random.sample(all_users, 100)

all_actual_items = []
for user in test_ids:
    user_actual_items = df.loc[(df["userId"]==user) & (df["rating"]>=3), "movieId"].tolist()
    all_actual_items.append(user_actual_items)

#### Convert the data to COO and CSR format

In [7]:
userId = df['userId'].values
movieId = df['movieId'].values
rating = df['rating'].values
mat = coo_matrix((rating, (userId, movieId)), dtype=np.float64)
mat_csr = mat.tocsr()
print ("shape of the matrix is {}".format(mat.shape))

shape of the matrix is (6041, 3953)


#### Helper Function

In [8]:
def get_recommendations(handle, algo, n):
    all_recommended_items = []
    for user in test_ids:
        user_recommended_items = []
        for i in handle(user, n):
            if algo=="frov":
                item = i[0]
            elif algo=="spark":
                item = i.product
            user_recommended_items.append(item)
        all_recommended_items.append(user_recommended_items)
    return all_recommended_items

## <font color='green'> 2. Frovedis ALS<font>

#### Import Frovedis libraries

In [9]:
from frovedis.exrpc.server import FrovedisServer
from frovedis.mllib.recommendation import ALS as frovALS

#### Execute Frovedis ALS

In [10]:
# Initializing Frovedis server 
FrovedisServer.initialize("mpirun -np 8 " + os.environ["FROVEDIS_SERVER"])

# start timer
start_time = time.time()

# fitting the input matrix on a ALS object
als = frovALS(max_iter=30, regParam=0.01).fit(mat_csr, rank=4)

# end timer
frov_time = time.time() - start_time

# recommend 10 items for all test user
all_recommended_items = get_recommendations(als.recommend_products, "frov", 10)
    
# calculate MAP metrics
frov_score = ml_metrics.mapk(all_actual_items, all_recommended_items, 10)

# print time and score
print("Frovedis Train Time = ", frov_time)
print("Frovedis MAP Score = ", frov_score)

als.release()
FrovedisServer.shut_down()

Frovedis Train Time =  2.8071365356445312
Frovedis MAP Score =  0.48624484126984124


## <font color='green'> 3. Pyspark ALS<font>

#### Import Pyspark libraries

In [11]:
import findspark
findspark.init()
import pyspark
from pyspark.mllib.recommendation import ALS as pyspark_als

#### Create pyspark context

In [12]:
sc = pyspark.SparkContext(master="local[12]", appName="als")

#### Create a pyspark RDD

In [13]:
ratingsRDD = sc.parallelize(zip(mat.row, mat.col, mat.data))
ratingsRDD.getNumPartitions()

12

#### Execute Pyspark ALS

In [14]:
try:
    # start timer
    start_time = time.time()

    # fitting on the pyspark ALS
    model = pyspark_als.trainImplicit(ratingsRDD, rank=4, iterations=30, lambda_=0.01)

    # stop timer
    sp_time = time.time() - start_time

    # recommend 10 items for all test user
    all_recommended_items = get_recommendations(model.recommendProducts, "spark", 10)

    # calculate MAP metrics
    sp_score = ml_metrics.mapk(all_actual_items, all_recommended_items, 10)

    # print time and score
    print("Pyspark Train Time = ", sp_time)
    print("Pyspark MAP Score = ", sp_score)
    
finally:
    sc.stop()

Pyspark Train Time =  7.290646314620972
Pyspark MAP Score =  0.48277698412698405


### 3.0 Results Comparison

#### Time

In [15]:
print("frovedis training time : ", frov_time)
print("pyspark training time : ", sp_time)

frovedis training time :  2.8071365356445312
pyspark training time :  7.290646314620972


#### Score

In [16]:
print("frovedis MAP score : ", frov_score)
print("pyspark MAP score : ", sp_score)

frovedis MAP score :  0.48624484126984124
pyspark MAP score :  0.48277698412698405
