# Laboratorium 6 - rekomendacje grupowe

## Przygotowanie

 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab6`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas matplotlib`

## Część 1. - przygotowanie danych

In [1]:
import math
import numpy as np
import pandas

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [2]:
# wczytujemy oceny uytkownikow i obliczamy (za pomocą collaborative filtering) wszystkie przewidywane oceny filmow

raw_ratings = pandas.read_csv('ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,10,8,10,7,9,10,9,7,8,10,...,8,8,10,7,8,8,7,9,9,9
2,6,4,5,4,5,6,5,3,5,5,...,4,4,5,4,4,4,4,5,5,5
3,5,3,4,3,4,4,5,3,4,5,...,3,4,5,3,4,4,3,4,4,4
4,8,6,8,6,7,8,7,5,7,8,...,6,7,8,6,7,7,6,7,8,7
5,6,5,6,5,5,6,5,4,5,6,...,5,5,6,4,5,5,5,6,6,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,9,6,8,6,8,9,8,6,8,9,...,7,7,9,6,7,7,6,8,8,8
607,8,7,8,6,8,8,8,6,8,8,...,7,7,8,6,7,7,6,8,8,8
608,8,6,7,6,7,7,7,5,6,7,...,6,6,7,5,6,6,5,7,7,7
609,5,4,5,4,5,5,5,4,4,5,...,4,4,5,4,4,4,4,4,5,5


In [3]:
# definiujemy testowe grupy uzytkownikow, dla ktorych bedziemy generowac rekomendacje

groups_no = 50
group_size = 5
groups = [sample(users, group_size) for i in range(groups_no)]
groups

[[142, 526, 62, 409, 548],
 [216, 388, 527, 540, 551],
 [563, 305, 495, 497, 77],
 [606, 564, 576, 555, 353],
 [99, 391, 454, 599, 9],
 [389, 117, 280, 253, 181],
 [258, 166, 529, 379, 220],
 [262, 411, 92, 264, 46],
 [567, 379, 386, 450, 565],
 [541, 381, 568, 149, 503],
 [102, 5, 56, 511, 162],
 [107, 166, 148, 539, 202],
 [536, 304, 123, 323, 491],
 [553, 5, 180, 111, 349],
 [578, 397, 460, 5, 423],
 [218, 74, 395, 580, 470],
 [20, 296, 146, 27, 592],
 [508, 250, 93, 139, 108],
 [42, 53, 117, 509, 268],
 [192, 589, 558, 61, 277],
 [134, 26, 392, 554, 62],
 [74, 255, 606, 171, 567],
 [374, 396, 603, 219, 313],
 [336, 290, 457, 141, 465],
 [246, 370, 341, 291, 336],
 [557, 198, 569, 321, 556],
 [50, 211, 435, 262, 475],
 [12, 380, 78, 234, 481],
 [114, 101, 308, 493, 72],
 [461, 17, 367, 462, 205],
 [199, 609, 538, 561, 360],
 [402, 164, 296, 251, 292],
 [492, 274, 559, 192, 541],
 [578, 146, 105, 378, 505],
 [299, 167, 469, 567, 133],
 [346, 80, 453, 86, 70],
 [239, 248, 235, 168, 40

## Część 2. - algorytmy proste

In [4]:
class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass

    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [5]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
        
    def get_average_score(self, movie_id, ratings, group):
        return sum(ratings[movie_id][user] for user in group) / len(group)
    
    def recommend(self, movies, ratings, group, size):
        return sorted(movies, key= lambda movie: self.get_average_score(movie, ratings, group), reverse=True)[:size]
    
AverageRecommender().recommend(movies, ratings, groups[0], 10)

[136540, 356, 518, 175475, 2535, 5741, 2571, 2959, 318, 937]

In [6]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen,
#   ale rownoczesnie wykluczajacy te filmy, ktore otrzymaly choc jedna ocene ponizej thresholdu

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold
        
    def get_average_score(self, movie_id, ratings, group):
        return sum(ratings[movie_id][user] for user in group) / len(group)
        
    def recommend(self, movies, ratings, group, size):
        sorted_movies = sorted(movies, key= lambda movie: self.get_average_score(movie, ratings, group), reverse=True)
        filtered_movies = list(filter(lambda movie: min(ratings[movie][user] for user in group), sorted_movies))
        return filtered_movies[:size]
    
AverageWithoutMiseryRecommender(6).recommend(movies, ratings, groups[0], 10)

[136540, 356, 518, 175475, 2535, 5741, 2571, 2959, 318, 937]

In [7]:
# algorytm uwzgledniajacy preferencje tylko jednego uzytkownika

class DictatorshipRecommender(Recommender):
    def __init__(self, dictator_id):
        self.name = 'dictatorship'
        self.dictator_id = dictator_id
        
    def recommend(self, movies, ratings, group, size):
        dictator_ratings = ratings[self.dictator_id]
        return sorted(movies, key = lambda movie: ratings[movie][self.dictator_id], reverse=True)[:size]
    
DictatorshipRecommender(groups[0][1]).recommend(movies, ratings, groups[0], 10)

[518, 136540, 260, 296, 356, 527, 593, 1089, 1092, 1198]

In [8]:
# algorytm, ktory w kazdej turze uwzglednia preferencje tylko jednego, kolejnego uzytkownika

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
    
    def recommend(self, movies, ratings, group, size):
        recommendations = []
        for i in range(size):
            current_user_rating = sorted(movies, key = lambda movie: ratings[movie][group[i%len(group)]], reverse=True)
            for movie in current_user_rating:
                if movie not in recommendations:
                    recommendations.append(movie)
                    break
        return recommendations
            
FairnessRecommender().recommend(movies, ratings, groups[0], 10)

[136540, 518, 6, 356, 2571, 47, 260, 50, 175475, 2959]

In [9]:
# algorytm zachlanny, aproksymujacy metode Proportional Approval Voting
#   w kazdej iteracji wybieramy ten film, ktory najbardziej zwieksza zadowolenie zgodnie z punktacja PAV

class PAVRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def count_score(self, ratings, movie_id, group, group_movies_counter):
        score = 0
        for user in group:
            if ratings[movie_id][user] >= self.threshold:
                score += 1 / (group_movies_counter[user] + 1)
        return score
        
    def recommend(self, movies, ratings, group, size):
        from collections import defaultdict
        group_movies_counter = defaultdict(int)
        recommendations = []
        movie_set = set(movies)
        for i in range(size):
            best_movie = sorted(list(movie_set), key=lambda movie: self.count_score(ratings, movie, group, group_movies_counter),reverse=True)[0]
            movie_set.remove(best_movie)
            for user in group:
                if ratings[best_movie][user] >= self.threshold:
                    group_movies_counter[user] += 1
            recommendations.append(best_movie)
        return recommendations
        
PAVRecommender(6).recommend(movies, ratings, groups[0], 10)

[11, 32, 36, 47, 87, 110, 150, 225, 260, 262]

## Część 3. - funkcje celu

In [10]:
# dwie funkcje pomocnicze:
#  - znajdujaca ulubione filmy danego uzytkownika
#  - obliczajaca sume ocen wystawionych przez uzytkownika wszystkim filmom w rekomendacji

def top_n_movies_for_user(ratings, movies, user_id, n):
    return sorted(movies, key=lambda movie : ratings[movie][user_id], reverse=True)[:n]

def total_score(recommendation, user_id, ratings):
    return sum(ratings[movie][user_id] for movie in recommendation)

In [11]:
# funkcja obliczajaca zadowolenie pojedynczego uzytkownika
#  - iloraz zadowolenia z wygenerowanej rekomendacji oraz zadowolenia z hipotetycznej rekomendacji idealnej
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    #zadowolenie z rekomendacji / zadowolenie jeśl user mogby fac swoja liste (top n)
    return total_score(recommendation, user_id, ratings)/  \
total_score(top_n_movies_for_user(ratings, movies, user_id, len(recommendation)), user_id, ratings)


# funkcja celu - srednia z zadowolenia wszystkich uzytkownikow w grupie
def overall_group_satisfaction(recommendation, group, movies, ratings):
    return 1.0 * sum([overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]) / len(group)

# funkcja celu - roznica miedzy maksymalnym i minimalnym zadowolenie w grupie
def group_dissatisfaction(recommendation, group, movies, ratings):
    satisfaction_scores = [overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]
    return max(satisfaction_scores) - min(satisfaction_scores)

## Część 4. - Sequential Hybrid Aggregation

In [12]:
# algorytm balansujacy pomiedzy wyborem elementow o najwyzszej sredniej ocen
#   i o najwyzszej minimalnej ocenie
#   wyliczajacy w kazdej iteracji parametr alfa - jak na wykladzie
class SequentialHybridAggregationRecommender(Recommender):
    def __init__(self):
        self.name = 'sequential_hybrid_aggregation'
        self.alpha = 0
        
    def get_average_score(self, movie_id, ratings, group):
        return sum(ratings[movie_id][user] for user in group) / len(group)
    
    def get_score(self, ratings, movie, group):
        # (1 - a) * srednia z ocen dla danego filmu + a * najmnioejsza ocena
        score = (1.0 - self.alpha) * self.get_average_score(movie, ratings, group) + self.alpha * min(ratings[movie][user] for user in group)  
        self.alpha = (max([ratings[movie][user] for user in group]) - min([ratings[movie][user] for user in group]))
        return score       
    
    def recommend(self, movies, ratings, group, size):
        return sorted(movies, key = lambda movie: self.get_score(ratings, movie, group))[:size]

    
SequentialHybridAggregationRecommender().recommend(movies, ratings, groups[0], 10)

[63540, 1090, 2694, 1527, 2122, 3537, 26313, 27772, 3478, 1225]

## Część 5. - porównanie algorytmów

In [13]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(2),
    DictatorshipRecommender(1),
    FairnessRecommender(),
    PAVRecommender(5),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 10

# dla kazdego algorytmu:
#  - wygenerujmy jedna rekomendacje dla kazdej grupy
#  - obliczmy wartosci funkcji celu dla kazdej rekomendacji
#  - obliczmy srednia i odchylenie standardowe wartosci funkcji celu
#  - wypiszmy wyniki na konsole

def calculate_recomender_data(recommender):
    from statistics import mean, stdev
    satisfactions_for_recommender = \
    [overall_group_satisfaction(recommender.recommend(movies, ratings, group, recommendation_size), group, movies, ratings) for group in groups]
    return  {'mean': np.mean(satisfactions_for_recommender), 'std': stdev(satisfactions_for_recommender)}

print([calculate_recomender_data(recommender) for recommender in recommenders])

[{'mean': 0.722902184040394, 'std': 0.03319774789005422}, {'mean': 0.9967488531821488, 'std': 0.004026227507427461}, {'mean': 0.9967488531821488, 'std': 0.004026227507427461}, {'mean': 0.8982101169336519, 'std': 0.016137603520767013}, {'mean': 0.9633912453927148, 'std': 0.013787759895080983}, {'mean': 0.8512065575661991, 'std': 0.048496213144968534}, {'mean': 0.695354158885849, 'std': 0.09242351628951077}]
