<a href="https://colab.research.google.com/github/ihagoSantos/recommendation-systems/blob/main/non_personalized_recommender_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Non-Personalized RS

In this step we will implement the two main non-personalized Recommender Systems (RS). They are:
- Most-Popular: the one that recommends the most popular items
- Best-Rated: the one that recommends the items who was rated with higher grades

For this reason, you must do:
- Read the train file extracted from dataset
- Create a sparse matrix for the ratings file
- Implement the most-popular and save items recommended
- Implement the best-rated and save items recommended


In [None]:
# import libs
import operator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.sparse import csr_matrix
from collections import OrderedDict

# useful command
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

plt.rcParams.update({'font.size': 14})

# Reading train and test files

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_url = '/content/drive/My Drive/Pós Graduação PUC Minas/11 - Sistemas de Recomendação/Unidade 1/praticas/dataset/ML-1M/trainSet.txt'
test_url = '/content/drive/My Drive/Pós Graduação PUC Minas/11 - Sistemas de Recomendação/Unidade 1/praticas/dataset/ML-1M/testSet.txt'

df_train = pd.read_csv(
    train_url,
    sep='::',
    engine='python',
    names=['userId', 'itemId', 'rating', 'timestamp'],
    on_bad_lines='skip'
)
df_test = pd.read_csv(
    test_url,
    sep='::',
    engine='python',
    names=['userId', 'itemId', 'rating', 'timestamp'],
    on_bad_lines='skip'
)


In [None]:
df_train.head()
df_test.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,1,1193,5.0,978300760.0
1,1,661,3.0,978302109.0
2,1,914,3.0,978301968.0
3,1,3408,4.0,978300275.0
4,1,1197,3.0,978302268.0


Unnamed: 0,userId,itemId,rating,timestamp
0,1,2355,5.0,978824291.0
1,1,595,5.0,978824268.0
2,1,2687,3.0,978824268.0
3,1,48,5.0,978824351.0
4,1,745,3.0,978824268.0


# Creating Sparse Matrix

In [None]:
# select users, items and ratings logs (i.e., all information from each column)
users = df_train['userId']
items = df_train['itemId']
ratings = df_train['rating']

In [None]:
# Define the matrix dimensions based on the max index related to users and items
nb_users = max(users)
nb_items = max(items)

In [None]:
# Creating matrix of ratings
ratings_matrix = csr_matrix((ratings, (users, items)), shape=(nb_users + 1, nb_items + 1))
ratings_matrix.shape

(6041, 3953)

# An useful function
This function is used to save the recommendations in a file.

In [None]:
def dumpRecommendation(recommendation, users_targets, file_name):

    file_out = open(file_name, 'w')

    # for each user target
    for userId in users_targets:
        issuedItems = ""
        # for each item in the previous order
        for itemId in recommendation[userId]:
            issuedItems += str(itemId) + ":" + str(0.0) + ","
        # saving in file in correct format
        string_s = str(userId) + "\t" + "[" + issuedItems
        string_out = string_s[:-1] + ']'
        file_out.write(string_out + "\n")

    file_out.close()

# Most-Popular Recommendation

In [None]:
ratings_matrix.data

array([5., 4., 5., ..., 2., 4., 4.])

In [None]:
items_popularity = {}

for i in range(ratings_matrix.shape[1]):
  items_popularity[i] = ratings_matrix[:,i].count_nonzero()

In [None]:
popularity_order = OrderedDict(sorted(items_popularity.items(), key = operator.itemgetter(1), reverse=True))
most_popular = list(popularity_order.keys())

In [None]:
most_popular[:10]

[2858, 1196, 260, 1210, 2028, 480, 589, 2571, 1270, 1198]

In [None]:
# Size of each recommendation
top_k = 10

In [None]:
# Setting the recommendations of items that have not be rated by the user
recommendation = {}

for u in range(ratings_matrix.shape[0]):
    # recommending just the most popular
    recommendation[u] = []
    cont = 0
    # recommending the most popular that have never seen by users
    for i in most_popular:
        # recommending the top-k items
        if (cont < top_k):
            if (ratings_matrix[u,i] == 0):
                recommendation[u].append(i)
                cont += 1
        else:
            break


In [None]:
# Save in a file
users_targets = df_test['userId'].unique()
dumpRecommendation(recommendations, users_targets, 'recList_MostPopular.txt')

In [None]:
recommendations[300]
recommendations[3000]
recommendations[6010]

[]

[]

[2858]

# Best-Rated Recommendation

In [None]:
# Measure the mean of items' ratings
items_rating = {}

for i in range(ratings_matrix.shape[1]):
  items_rating[i] = np.mean(ratings_matrix[:,i]) # leva em conta todos os usuários da matriz


In [None]:
items_rating

{0: 0.0,
 1: 1.2385366661148816,
 2: 0.29647409369309713,
 3: 0.16801853997682503,
 4: 0.052805826849859294,
 5: 0.09733487833140209,
 6: 0.5126634663135242,
 7: 0.19582850521436845,
 8: 0.025823539149147492,
 9: 0.032610494951167024,
 10: 0.43999337857970533,
 11: 0.5442807482204932,
 12: 0.04303923191524582,
 13: 0.03790763118688959,
 14: 0.06174474424764112,
 15: 0.0423770898857805,
 16: 0.32941565965899683,
 17: 0.48088064889918886,
 18: 0.05230922032776031,
 19: 0.11190200297963912,
 20: 0.04188048336368151,
 21: 0.7200794570435358,
 22: 0.16023837113060752,
 23: 0.039066379738453894,
 24: 0.27114716106604864,
 25: 0.5047177619599404,
 26: 0.04353583843734481,
 27: 0.02118854494289025,
 28: 0.10081112398609501,
 29: 0.2421784472769409,
 30: 0.03327263698063235,
 31: 0.048998510180433705,
 32: 0.8688958781658666,
 33: 0.0013242840589306405,
 34: 0.9841085912928322,
 35: 0.0253269326270485,
 36: 0.5168018539976824,
 37: 0.004303923191524581,
 38: 0.006952491309385863,
 39: 0.7136235

In [None]:
rating_order = OrderedDict(sorted(items_rating.items(), key=operator.itemgetter(1), reverse=True))
best_rated = list(rating_order.keys())

In [None]:
best_rated[:10]

[2858, 260, 1196, 1210, 2028, 1198, 2571, 593, 589, 608]

In [None]:
# Setting the recommendations of items that have not rated by the user
recommendations = {}

for u in range(ratings_matrix.shape[0]):
  # recommending just most popular
  recommendations[u] = []
  count = 0
  for i in best_rated:
    # recommending the top-k items
    if(count < top_k):
      if(ratings_matrix[u,i] == 0):
        recommendations[u].append(i)
        count += 1
    else:
      break

In [None]:
# Save in file
users_targets = df_test['userId'].unique()
dumpRecommendation(recommendations, users_targets, 'recList_BestRated.txt')

In [None]:
recommendations[300]
recommendations[3000]
recommendations[6010]

[]

[]

[2858]