# Non-Personalized RS

In this step we will implement the two main non-personalized Recommender Systems (RS). They are:

- Most-Popular: the one that recommends the most popular items
- Best-Rated: the one that recommends the items who was rated with higher grades

For this reason, you must do:

- Read the train file extracted from the dataset
- Create a sparse matrix for the ratings file
- Implement the Most-Popular and save the items recommended
- Implement the Best-Rated and save the items recommended

In [3]:
# import libs
import operator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.sparse import csr_matrix
from collections import OrderedDict

# useful command
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

plt.rcParams.update({'font.size': 14})

# Carrega a base de dados sonar.
from google.colab import files
uploaded = files.upload()

Saving trainSet.txt to trainSet.txt
Saving testSet.txt to testSet.txt


## Reading train and test files

You can read this file as you prefer. I propose to read the files by the pandas' library and create the sparse matrix after it.

In [4]:
df_train = pd.read_csv('trainSet.txt', sep = '::', names = ['userId', 'itemId', 'rating', 'timestamp'])
df_test = pd.read_csv('testSet.txt', sep = '::', names = ['userId', 'itemId', 'rating', 'timestamp'])

df_train.head()
df_test.head()

  df_train = pd.read_csv('trainSet.txt', sep = '::', names = ['userId', 'itemId', 'rating', 'timestamp'])
  df_test = pd.read_csv('testSet.txt', sep = '::', names = ['userId', 'itemId', 'rating', 'timestamp'])


Unnamed: 0,userId,itemId,rating,timestamp
0,1,1193,5.0,978300760.0
1,1,661,3.0,978302109.0
2,1,914,3.0,978301968.0
3,1,3408,4.0,978300275.0
4,1,1197,3.0,978302268.0


Unnamed: 0,userId,itemId,rating,timestamp
0,1,2355,5.0,978824291.0
1,1,595,5.0,978824268.0
2,1,2687,3.0,978824268.0
3,1,48,5.0,978824351.0
4,1,745,3.0,978824268.0


## Creating Sparse Matrix

I propose to use the csr_matrix from scipy.

In [5]:
# Select users, items and ratings logs (i.e., all information from each column)
users = df_train['userId']
items = df_train['itemId']
ratings = df_train['rating']

In [6]:
# Define the matrix dimensions based on the max index related to users and items
nb_users = max(users)
nb_items = max(items)

In [7]:
# Creating matrix of ratings
ratings_matrix = csr_matrix((ratings, (users, items)), shape=(nb_users+1, nb_items+1))

ratings_matrix.shape

(6041, 3953)

## An useful function

This function is used to save the recommendations in a file.

In [8]:
def dumpRecommendation(recommendation, users_targets, file_name):

    file_out = open(file_name, 'w')

    # for each user target
    for userId in users_targets:
        issuedItems = ""
        # for each item in the previous order
        for itemId in recommendation[userId]:
            issuedItems += str(itemId) + ":" + str(0.0) + ","
        # saving in file in correct format
        string_s = str(userId) + "\t" + "[" + issuedItems
        string_out = string_s[:-1] + ']'
        file_out.write(string_out + "\n")

    file_out.close()

## Most-Popular Recommendation

In [9]:
# Items popularity
items_popularity = {}

for i in range(ratings_matrix.shape[1]):
  items_popularity[i] = ratings_matrix[:, i].count_nonzero()

print(items_popularity)

{0: 0, 1: 1810, 2: 558, 3: 339, 4: 115, 5: 198, 6: 796, 7: 342, 8: 53, 9: 73, 10: 753, 11: 866, 12: 105, 13: 72, 14: 105, 15: 100, 16: 525, 17: 722, 18: 94, 19: 271, 20: 100, 21: 1201, 22: 287, 23: 78, 24: 514, 25: 835, 26: 74, 27: 44, 28: 148, 29: 360, 30: 54, 31: 94, 32: 1322, 33: 3, 34: 1527, 35: 48, 36: 787, 37: 7, 38: 18, 39: 1184, 40: 24, 41: 214, 42: 173, 43: 134, 44: 234, 45: 449, 46: 120, 47: 886, 48: 297, 49: 21, 50: 1529, 51: 0, 52: 358, 53: 6, 54: 26, 55: 33, 56: 8, 57: 74, 58: 416, 59: 8, 60: 281, 61: 36, 62: 426, 63: 72, 64: 56, 65: 88, 66: 63, 67: 3, 68: 51, 69: 269, 70: 759, 71: 68, 72: 75, 73: 184, 74: 84, 75: 8, 76: 140, 77: 27, 78: 40, 79: 78, 80: 41, 81: 120, 82: 71, 83: 23, 84: 16, 85: 152, 86: 163, 87: 52, 88: 136, 89: 178, 90: 6, 91: 0, 92: 55, 93: 54, 94: 147, 95: 504, 96: 12, 97: 35, 98: 4, 99: 40, 100: 87, 101: 216, 102: 32, 103: 19, 104: 575, 105: 295, 106: 10, 107: 207, 108: 6, 109: 0, 110: 2186, 111: 1067, 112: 495, 113: 40, 114: 7, 115: 0, 116: 36, 117: 51

In [11]:
# Sorting the itemId descending by its popularity
popularity_order = OrderedDict(sorted(items_popularity.items(), key = operator.itemgetter(1), reverse = True))
most_popular = list(popularity_order.keys())

In [12]:
most_popular[:10]

[2858, 1196, 260, 1210, 2028, 480, 589, 2571, 1270, 1198]

In [13]:
# Size of each recommendation
top_k = 10

In [17]:
# Setting the recommendations of items that have not be rated by the user
recommendation = {}

for u in range(ratings_matrix.shape[0]):
    # recommending just the most popular
    recommendation[u] = []
    cont = 0
    # recommending the most popular that have never seen by users
    for i in most_popular:
        # recommending the top-k items
        if (cont < top_k):
            if (cont < top_k):
              if (ratings_matrix[u, i] == 0):
                  recommendation[u].append(i)
                  cont += 1
        else:
            break

In [18]:
# Save in a file
users_targets = df_test['userId'].unique()
dumpRecommendation(recommendation, users_targets, "recList_MostPopular.txt")

In [19]:
recommendation[300]
recommendation[3000]
recommendation[6010]

[260, 1210, 480, 2571, 1580, 527, 1265, 1097, 2997, 2628]

[2028, 593, 608, 110, 2396, 527, 858, 1617, 2762, 318]

[2858, 480, 589, 2571, 593, 608, 2396, 1097, 1617, 2762]

## Best-Rated Recommendation

In [21]:
# Measure the mean of items' ratings
items_rating = {}

for i in range(ratings_matrix.shape[1]):
  items_rating[i] = np.mean(ratings_matrix[:, i])

print(items_rating)

{0: 0.0, 1: 1.2385366661148816, 2: 0.29647409369309713, 3: 0.16801853997682503, 4: 0.052805826849859294, 5: 0.09733487833140209, 6: 0.5126634663135242, 7: 0.19582850521436845, 8: 0.025823539149147492, 9: 0.032610494951167024, 10: 0.43999337857970533, 11: 0.5442807482204932, 12: 0.04303923191524582, 13: 0.03790763118688959, 14: 0.06174474424764112, 15: 0.0423770898857805, 16: 0.32941565965899683, 17: 0.48088064889918886, 18: 0.05230922032776031, 19: 0.11190200297963912, 20: 0.04188048336368151, 21: 0.7200794570435358, 22: 0.16023837113060752, 23: 0.039066379738453894, 24: 0.27114716106604864, 25: 0.5047177619599404, 26: 0.04353583843734481, 27: 0.02118854494289025, 28: 0.10081112398609501, 29: 0.2421784472769409, 30: 0.03327263698063235, 31: 0.048998510180433705, 32: 0.8688958781658666, 33: 0.0013242840589306405, 34: 0.9841085912928322, 35: 0.0253269326270485, 36: 0.5168018539976824, 37: 0.004303923191524581, 38: 0.006952491309385863, 39: 0.713623572256249, 40: 0.015725873199801357, 41:

In [24]:
# Sorting the itemId descending by its popularity
rating_order = OrderedDict(sorted(items_rating.items(), key = operator.itemgetter(1), reverse = True))
best_rated = list(rating_order.keys())

In [25]:
best_rated[:10]

[2858, 260, 1196, 1210, 2028, 1198, 2571, 593, 589, 608]

In [26]:
# Setting the recommendations of items that have not be rated by the user
recommendation = {}

for u in range(ratings_matrix.shape[0]):
    # recommending just the most popular
    recommendation[u] = []
    cont = 0
    # recommending the most popular that have never seen by users
    for i in best_rated:
        # recommending the top-k items
        if (cont < top_k):
            if (cont < top_k):
              if (ratings_matrix[u, i] == 0):
                  recommendation[u].append(i)
                  cont += 1
        else:
            break

In [27]:
# Save in a file
users_targets = df_test['userId'].unique()
dumpRecommendation(recommendation, users_targets, "recList_BestRated.txt")

In [28]:
recommendation[300]
recommendation[3000]
recommendation[6010]

[260, 1210, 2571, 527, 480, 1580, 2997, 1265, 296, 1097]

[2028, 593, 608, 527, 110, 858, 318, 2762, 2396, 1617]

[2858, 2571, 593, 589, 608, 318, 480, 2762, 2396, 1617]