# Sequential Group Recommendations in MovieLens 100K


**Authors**: Ashkan Khademian, Ujunwa Edum\
**Project Part**: Part I\
**Course**: DATA.ML.360-2024-2025-1 Recommender Systems

# Foundations

## Introduction
Lorem ipsum

## Install Requirements

Use the comment template for installing your packages that are not already present in the google colab environment.

In [1]:
# !pip install <package-name>

## Import Libararies

### Main Libraries

In [2]:
import random
import typing
from time import sleep
from collections import defaultdict

from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations

## Part II Utils

In [3]:
from part2_utils.predict_user_rating import predict_user_rating

### Typing

In [4]:
from typing import *
from pandas.core.frame import DataFrame, Series

## Define Constants

In [5]:
RATING_DATASET = "data/ml-latest-small/ratings.csv"

# Sequential Group Recommendation

## Load Data

In [6]:
raw_df = pd.read_csv(RATING_DATASET)

### Transform CSV DataFrame to User-Movies Matrix
The `transform_csv_dataframe_to_user_movies_matrix` function transforms the DataFrame resulted from the dataset CSV into a user-movies matrix where the rows represent users, the columns represent movies, and the values represent the ratings given by users to movies.

In [7]:
def transform_csv_dataframe_to_user_movies_matrix(csv_df: DataFrame) -> DataFrame:
  user_movie_matrix = csv_df.pivot(index='userId', columns='movieId', values='rating')
  user_movie_matrix.reset_index(inplace=True)
  user_movie_matrix.columns.name = None
  return user_movie_matrix

In [8]:
user_movie_matrix = transform_csv_dataframe_to_user_movies_matrix(raw_df)

## Preference Score

In [9]:
def preference_score(user_id, movie_id):
    return predict_user_rating(user_movie_matrix, user_id, movie_id)

In [10]:
def calculate_user_satisfaction(user_id, group_rec, user_ratings, user_satisfactions):
    """
    Calculate user satisfaction based on the group recommendations.
    """
    user_ideal_satisfaction = user_satisfactions[user_id]
    group_satisfaction = sum([preference_score(user_id, movie) for movie in group_rec])
    return group_satisfaction / user_ideal_satisfaction if user_ideal_satisfaction > 0 else 0

In [11]:
def calculate_group_satisfaction(group, group_rec, user_ratings, user_satisfactions):
    """
    Calculate average satisfaction across all users in the group.
    """
    individual_satisfactions = [calculate_user_satisfaction(user, group_rec, user_ratings, user_satisfactions) for user in group]
    return np.mean(individual_satisfactions)


In [12]:
def calculate_group_disagreement(group, group_rec, user_ratings, user_satisfactions):
    """
    Calculate disagreement within the group as the difference between max and min satisfaction.
    """
    individual_satisfactions = [calculate_user_satisfaction(user, group_rec, user_ratings, user_satisfactions) for user in group]
    return max(individual_satisfactions) - min(individual_satisfactions)

In [13]:
def avgScore(group, movie, user_ratings):
    """Calculate the average preference score for the movie among the group members."""
    return np.mean([preference_score(user, movie) for user in group])

def leastScore(group, movie, user_ratings):
    """Calculate the minimum preference score for the movie among the group members."""
    return min([preference_score(user, movie) for user in group])

In [14]:
def generate_sequential_recommendations(
    group, 
    user_ratings, 
    movies, 
    iterations=5, 
    top_k=10, 
    alpha=0.0,
):
    group_recommendations = []
    movie_scores = defaultdict(float)  # Persist movie scores across iterations for dynamic weighting
    user_satisfactions = defaultdict(float)  # Persist user satisfaction scores across iterations for dynamic weighting
    
    for user in group:
        top_user_prefs = sorted(user_ratings[user].items(), key=lambda x: x[1], reverse=True)[:top_k]
        user_satisfactions[user] = sum([score for _, score in top_user_prefs])

    for iteration in range(iterations):
        # Calculate movie scores based on the new weighted combination of avgScore and leastScore
        for movie in tqdm(movies, desc=f"Calculating movie scores for iteration {iteration}"):
            avg_score = avgScore(group, movie, user_ratings)
            least_score = leastScore(group, movie, user_ratings)
            movie_scores[movie] = alpha * least_score + (1 - alpha) * avg_score

        # Sort movies by the adjusted group preference scores and select the top recommendations
        top_movies = sorted(movie_scores, key=movie_scores.get, reverse=True)[:top_k]
        group_recommendations.append(top_movies)

        # Calculate and log satisfaction and disagreement metrics
        group_sat = calculate_group_satisfaction(group, top_movies, user_ratings, user_satisfactions)
        group_dis = calculate_group_disagreement(group, top_movies, user_ratings, user_satisfactions)
        alpha = group_dis

        print(f"Iteration {iteration + 1}")
        print(f"Top Movies: {top_movies}")
        print(f"Group Satisfaction: {group_sat:.3f}")
        print(f"Group Disagreement: {group_dis:.3f}\n")
        sleep(0.001)

    return group_recommendations


In [15]:
sample_group = [597, 217, 66, 177, 274, 391, 483, 561, 414, 509, 160]

user_ratings = raw_df[raw_df["userId"].isin(sample_group)].groupby('userId')[['movieId', 'rating']].apply(
    lambda x: dict(zip(x['movieId'], x['rating']))).to_dict()
group_users_movies = user_movie_matrix[user_movie_matrix['userId'].isin(sample_group)].dropna(axis=1, how='all').columns.tolist()[1:]
movies = random.sample([m for m in raw_df['movieId'].unique() if m not in group_users_movies], 100)

In [16]:
# Run the sequential group recommendation process
recommendations = generate_sequential_recommendations(sample_group, user_ratings, movies)

Calculating movie scores for iteration 0: 100%|██████████| 100/100 [00:42<00:00,  2.35it/s]


Iteration 1
Top Movies: [np.int64(4956), np.int64(1533), np.int64(66744), np.int64(72308), np.int64(5544), np.int64(125974), np.int64(27820), np.int64(6918), np.int64(3742), np.int64(7743)]
Group Satisfaction: 0.776
Group Disagreement: 0.265



Calculating movie scores for iteration 1: 100%|██████████| 100/100 [00:41<00:00,  2.42it/s]


Iteration 2
Top Movies: [np.int64(4956), np.int64(1533), np.int64(66744), np.int64(125974), np.int64(72308), np.int64(5544), np.int64(6918), np.int64(3742), np.int64(27820), np.int64(897)]
Group Satisfaction: 0.774
Group Disagreement: 0.240



Calculating movie scores for iteration 2: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s]


Iteration 3
Top Movies: [np.int64(4956), np.int64(1533), np.int64(66744), np.int64(72308), np.int64(125974), np.int64(5544), np.int64(6918), np.int64(3742), np.int64(27820), np.int64(897)]
Group Satisfaction: 0.774
Group Disagreement: 0.240



Calculating movie scores for iteration 3: 100%|██████████| 100/100 [00:41<00:00,  2.43it/s]


Iteration 4
Top Movies: [np.int64(4956), np.int64(1533), np.int64(66744), np.int64(72308), np.int64(125974), np.int64(5544), np.int64(6918), np.int64(3742), np.int64(27820), np.int64(897)]
Group Satisfaction: 0.774
Group Disagreement: 0.240



Calculating movie scores for iteration 4: 100%|██████████| 100/100 [00:40<00:00,  2.45it/s]


Iteration 5
Top Movies: [np.int64(4956), np.int64(1533), np.int64(66744), np.int64(72308), np.int64(125974), np.int64(5544), np.int64(6918), np.int64(3742), np.int64(27820), np.int64(897)]
Group Satisfaction: 0.774
Group Disagreement: 0.240

