# Non-Personalized RS

In this step we will implement the two main non-personalized Recommender Systems (RS). They are:

- Most-Popular: the one that recommends the most popular items
- Best-Rated: the one that recommends the items who was rated with higher grades

For this reason, you must do:

- Read the train file extracted from the dataset 
- Create a sparse matrix for the ratings file
- Implement the Most-Popular and save the items recommended
- Implement the Best-Rated and save the items recommended

In [3]:
# import libs
import operator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.sparse import csr_matrix
from collections import OrderedDict

# useful command
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

plt.rcParams.update({'font.size': 14})

## Reading train and test files

You can read this file as you prefer. I propose to read the files by the pandas' library and create the sparse matrix after it.

In [5]:
df_train = pd.read_csv('./Dataset/ML-1M/trainSet.txt', sep='::', names=['userId', 'itemId', 'rating', 'timestamp'])
df_test = pd.read_csv('./Dataset/ML-1M/testSet.txt', sep='::', names=['userId', 'itemId', 'rating', 'timestamp'])

df_train.head()
df_test.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,1,1193,5.0,978300760.0
1,1,661,3.0,978302109.0
2,1,914,3.0,978301968.0
3,1,3408,4.0,978300275.0
4,1,1197,3.0,978302268.0


Unnamed: 0,userId,itemId,rating,timestamp
0,1,2355,5.0,978824291.0
1,1,595,5.0,978824268.0
2,1,2687,3.0,978824268.0
3,1,48,5.0,978824351.0
4,1,745,3.0,978824268.0


## Creating Sparse Matrix

I propose to use the csr_matrix from scipy.

In [6]:
# Select users, items and ratings logs (i.e., all information from each column)
users = df_train['userId']
items = df_train['itemId']
ratings = df_train['rating']

In [8]:
# Define the matrix dimensions based on the max index related to users and items
nb_users = max(users)
nb_items = max(items)

In [11]:
# Creating matrix of ratings
ratings_matrix = csr_matrix((ratings, (users, items)), shape=(nb_users+1, nb_items+1))

ratings_matrix.shape

(6041, 3953)

## An useful function

This function is used to save the recommendations in a file.

In [12]:
def dumpRecommendation(recommendation, users_targets, file_name):
    
    file_out = open(file_name, 'w')
    
    # for each user target
    for userId in users_targets:
        issuedItems = ""
        # for each item in the previous order
        for itemId in recommendation[userId]:
            issuedItems += str(itemId) + ":" + str(0.0) + ","
        # saving in file in correct format
        string_s = str(userId) + "\t" + "[" + issuedItems
        string_out = string_s[:-1] + ']'
        file_out.write(string_out + "\n")
    
    file_out.close()

## Most-Popular Recommendation

In [14]:
# Items popularity
items_popularity = {}
for i in range(ratings_matrix.shape[1]):
    items_popularity[i] = ratings_matrix[:,i].count_nonzero()

In [15]:
# Sorting the itemId descending by its popularity
popularity_order = OrderedDict(sorted(items_popularity.items(), key=operator.itemgetter(1), reverse=True))
most_popular = list(popularity_order.keys())

In [16]:
most_popular[:10]

[2858, 1196, 260, 1210, 2028, 480, 589, 2571, 1270, 1198]

In [17]:
# Size of each recommendation
top_k = 10

In [18]:
# Setting the recommendations of items that have not be rated by the user
recommendation = {}

for u in range(ratings_matrix.shape[0]):
    # recommending just the most popular
    recommendation[u] = []
    cont = 0
    # recommending the most popular that have never seen by users
    for i in most_popular:
        # recommending the top-k items 
        if (cont < top_k):
            if (ratings_matrix[u, i] == 0):
                recommendation[u].append(i)
                cont += 1
        else:
            break

In [19]:
# Save in a file
users_targets = df_test['userId'].unique()
dumpRecommendation(recommendation, users_targets, "recList_MostPopular.txt")

In [20]:
recommendation[300]
recommendation[3000]
recommendation[6010]

[260, 1210, 480, 2571, 1580, 527, 1265, 1097, 2997, 2628]

[2028, 593, 608, 110, 2396, 527, 858, 1617, 2762, 318]

[2858, 480, 589, 2571, 593, 608, 2396, 1097, 1617, 2762]

## Best-Rated Recommendation

In [21]:
# Measure the mean of items' ratings
items_rating = {}

for i in range(ratings_matrix.shape[1]):
    items_rating[i] = np.mean(ratings_matrix[:, i])

In [22]:
print(items_rating)

550736633007, 3203: 0.2443304088727032, 3204: 0.09220327760304584, 3205: 0.0011587485515643105, 3206: 0.04866743916570104, 3207: 0.002648568117861281, 3208: 0.09534845224300613, 3209: 0.00016553550736633007, 3210: 0.4658169177288528, 3211: 0.02913424929647409, 3212: 0.0008276775368316504, 3213: 0.1079291508028472, 3214: 0.043535838437344807, 3215: 0.004469458698890912, 3216: 0.0016553550736633006, 3217: 0.04039066379738454, 3218: 0.013242840589306406, 3219: 0.18242012911769573, 3220: 0.00033107101473266014, 3221: 0.014401589140870717, 3222: 0.014732660155603376, 3223: 0.012249627545108424, 3224: 0.04370137394471114, 3225: 0.0374110246647906, 3226: 0.0, 3227: 0.0, 3228: 0.00016553550736633007, 3229: 0.0006621420294653203, 3230: 0.03045853335540473, 3231: 0.0, 3232: 0.003310710147326601, 3233: 0.0016553550736633006, 3234: 0.0, 3235: 0.02218175798708823, 3236: 0.0011587485515643105, 3237: 0.00016553550736633007, 3238: 0.07233901671908624, 3239: 0.009269988412514484, 3240: 0.01274623406720

In [23]:
# Sorting the itemId descending by its popularity
rating_order = OrderedDict(sorted(items_rating.items(), key=operator.itemgetter(1), reverse=True))
best_rated = list(rating_order.keys())

In [24]:
best_rated[:10]

[2858, 260, 1196, 1210, 2028, 1198, 2571, 593, 589, 608]

In [25]:
# Setting the recommendations of items that have not be rated by the user
recommendation = {}

for u in range(ratings_matrix.shape[0]):
    # recommending just the most popular
    recommendation[u] = []
    cont = 0
    # recommending the most popular that have never seen by users
    for i in best_rated:
        # recommending the top-k items 
        if (cont < top_k):
            if (ratings_matrix[u, i]==0):
                recommendation[u].append(i)
                cont += 1
        else:
            break

In [26]:
# Save in a file
users_targets = df_test['userId'].unique()
dumpRecommendation(recommendation, users_targets, "recList_BestRated.txt")

In [27]:
recommendation[300]
recommendation[3000]
recommendation[6010]

[260, 1210, 2571, 527, 480, 1580, 2997, 1265, 296, 1097]

[2028, 593, 608, 527, 110, 858, 318, 2762, 2396, 1617]

[2858, 2571, 593, 589, 608, 318, 480, 2762, 2396, 1617]