In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import sqlalchemy
import warnings
import os
import random
import settings.config_movie_lens as cfg

preprocessed_dataset_folder = cfg.preprocessed_dataset_folder
group_sizes_to_create = cfg.group_sizes_to_create
group_similarity_to_create = cfg.group_similarity_to_create
group_number = cfg.group_number

ratings = pd.read_csv('ratings.csv', skiprows=[i for i in range(1,19000000)]);


In [2]:
#similarity evaluation
user_matrix = ratings.pivot_table(columns='movieId', index='userId', values='rating')

user_id_set = set(ratings['userId'])
user_id_indexes = user_matrix.index.values
user_matrix = user_matrix.fillna(0)
numpy_array = user_matrix.to_numpy()
sim_matrix = np.corrcoef(numpy_array)


In [3]:
#generates random and similar groups
from synthetic_groups_generation.groups_generators import GroupsGenerator
group_list = list()
for group_type in group_similarity_to_create:
    print(group_type)
    grpGenerator = GroupsGenerator.getGroupsGenerator(group_type)
    current_list = grpGenerator.generateGroups(user_id_indexes, user_id_set, sim_matrix, group_sizes_to_create, group_number)
    group_list = group_list + current_list


RANDOM
20
40
60
SIMILAR
20
40
60


In [4]:
#aggregation strategy
from abc import ABC, abstractmethod

class AggregationStrategy(ABC):

    @staticmethod
    def getAggregator(strategy):
        if strategy == "ADD":
            return AdditiveAggregator()
        elif strategy == "LMS":
            return LeastMiseryAggregator()
            
        return None

@abstractmethod
def generate_group_recommendations_for_group(self, group_ratings, recommendations_number):
        pass

# aggregate using additive strategy
class AdditiveAggregator(AggregationStrategy):
    def generate_group_recommendations_for_group(self, group_ratings, recommendations_number):
        aggregated_df = group_ratings.groupby('movieId').sum()
        aggregated_df = aggregated_df.sort_values(by="rating", ascending=False).reset_index()[
            ['movieId', 'rating']]
        recommendation_list = list(aggregated_df.head(recommendations_number)['movieId'])
        return {"ADD": recommendation_list}

    
# aggregate using least misery strategy
class LeastMiseryAggregator(AggregationStrategy):
    def generate_group_recommendations_for_group(self, group_ratings, recommendations_number):
        aggregated_df = group_ratings.groupby('movieId').min()
        aggregated_df = aggregated_df.sort_values(by="rating", ascending=False).reset_index()[
            ['movieId', 'rating']]
        recommendation_list = list(aggregated_df.head(recommendations_number)['movieId'])
        return {"LMS": recommendation_list}
    
    
print(AdditiveAggregator.generate_group_recommendations_for_group(group_list, ratings, group_number))    

print(LeastMiseryAggregator.generate_group_recommendations_for_group(group_list, ratings, group_number))


{'ADD': [296, 318, 593, 356, 260, 480, 110, 527, 2571, 589, 50, 457, 1196, 1210, 1, 2858, 1198, 858, 150, 32]}
{'LMS': [5245, 105364, 56693, 71486, 71606, 71707, 112006, 1470, 71851, 1467, 33342, 6632, 56779, 6633, 98275, 26185, 72261, 72292, 105250, 26160]}
