In [9]:
from django_for_jupyter import init_django
init_django("LicentaDjango")

import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
from Licenta.models import CustomUser, Location, Rating

In [15]:
ratings = Rating.objects.all()
rating_data = ratings.values("user", "location", "rating")
rating_data

<QuerySet [{'user': 1, 'location': 3, 'rating': 4}, {'user': 1, 'location': 4, 'rating': 5}, {'user': 1, 'location': 5, 'rating': 3}, {'user': 1, 'location': 7, 'rating': 2}, {'user': 1, 'location': 8, 'rating': 1}, {'user': 2, 'location': 3, 'rating': 5}, {'user': 2, 'location': 4, 'rating': 3}, {'user': 2, 'location': 5, 'rating': 3}, {'user': 2, 'location': 6, 'rating': 2}, {'user': 2, 'location': 7, 'rating': 2}, {'user': 3, 'location': 3, 'rating': 1}, {'user': 3, 'location': 6, 'rating': 4}, {'user': 3, 'location': 7, 'rating': 5}, {'user': 3, 'location': 8, 'rating': 4}, {'user': 4, 'location': 4, 'rating': 2}, {'user': 4, 'location': 5, 'rating': 1}, {'user': 4, 'location': 6, 'rating': 4}, {'user': 4, 'location': 8, 'rating': 3}, {'user': 5, 'location': 3, 'rating': 1}, {'user': 5, 'location': 5, 'rating': 2}, '...(remaining elements truncated)...']>

In [147]:
locations = Location.objects.all()
users = CustomUser.objects.all()
user_ids = list()
location_ids = list()
for item in users:
    user_ids.append(item.id)
for item in locations:
    location_ids.append(item.id)
    
df = pd.DataFrame(index=user_ids, columns=location_ids)
for user_id in user_ids:
    for location_id in location_ids:
        df.at[user_id, location_id] = getRating(user_id, location_id)
df

Unnamed: 0,3,4,5,6,7,8,9
1,4,5,3,0,2,1,0
2,5,3,3,2,2,0,0
3,1,0,0,4,5,4,0
4,0,2,1,4,0,3,0
5,1,0,2,3,3,4,0


In [146]:
def getRating(user_id, location_id):
    rating = Rating.objects.filter(user_id=user_id, location_id=location_id)
    if not rating:
        return 0
    else:
        return rating[0].rating

In [155]:
def standardize(row):
    new_row = (row - row.mean())/(row.max()-row.min())
    return new_row
ratings_std = df.apply(standardize, axis=1)

#we are taking the transpose since we want the similarity between items (cosine_similarity works on the rows but our items are on the columns)
item_similarity = cosine_similarity(ratings_std.T) #we obtain the similarity matrix

item_similarity_df = pd.DataFrame(item_similarity, index=df.columns, columns=df.columns)
item_similarity_df

Unnamed: 0,3,4,5,6,7,8,9
3,1.0,0.614417,0.687154,-0.724218,-0.129584,-0.882525,-0.126322
4,0.614417,1.0,0.612267,-0.567212,-0.679103,-0.701626,0.007631
5,0.687154,0.612267,1.0,-0.673799,-0.553794,-0.671416,0.077395
6,-0.724218,-0.567212,-0.673799,1.0,0.151838,0.80616,-0.381766
7,-0.129584,-0.679103,-0.553794,0.151838,1.0,0.392775,-0.312973
8,-0.882525,-0.701626,-0.671416,0.80616,0.392775,1.0,-0.313226
9,-0.126322,0.007631,0.077395,-0.381766,-0.312973,-0.313226,1.0


In [158]:
def get_recommended_locations(location_id, user_rating):
    #we substract 2.5 so that when the rating is low (under 2.5), we make the score/similarity negative so that we do not recommend
    #locations that are similar to this one
    similar_score = item_similarity_df[location_id]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False) #from best to worst
    return similar_score #similar score represents the percentage in which the movies should be recommended to us
print(get_recommended_locations(7, 1))

4    1.018655
5    0.830692
9    0.469460
3    0.194376
6   -0.227758
8   -0.589163
7   -1.500000
Name: 7, dtype: float64


In [180]:
def get_recommendations(userID):
    my_user = CustomUser.objects.get(id = userID)
    ratings = Rating.objects.filter(user=my_user)
    rating_list = list()
    location_ids = list()
    for item in ratings:
        location_ids.append(item.location.id)
        rating_list.append(item.rating)
    my_list=list(zip(location_ids, rating_list))

    recommended_locations = pd.DataFrame()
    for location, rating in my_list:
        #for every rating we will obtain a row with the recommended locations based on that rating
        result = pd.DataFrame(get_recommended_locations(location, rating))
        recommended_locations = pd.concat([recommended_locations, result.T])

    #for every row/rating, we sum the scores obtained for each movie, which represent the percentage in which that
    #movie should be recommended, the movie with the highest score is first
    return recommended_locations.sum().sort_values(ascending=False)
   

In [181]:
get_recommendations(1)

4    5.119749
3    4.768198
5    4.345419
9    0.494618
7   -3.258194
6   -4.126414
8   -5.109948
dtype: float64