In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path = 'renttherunway_final_data.json'
df_rent = pd.read_json(file_path, lines=True)

In [None]:
df_rent_clean = df_rent.dropna(subset=["user_id", "item_id", "rating","age"]).drop_duplicates()

In [None]:
###############################################
# 3. Explicit Feedback
###############################################

df_rent_clean['interaction'] = df_rent_clean['rating']

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
category_bow = vectorizer.fit_transform(df_rent_clean['category'])

In [None]:
def calculate_sparsity(df):
    """Calculates sparsity for each numerical feature in the DataFrame."""
    numerical_features = df.select_dtypes(include=np.number).columns
    sparsity_results = {}

    for feature in numerical_features:
        sparsity = df[feature].isnull().sum() / len(df)
        sparsity_results[feature] = sparsity

    return sparsity_results


sparsity_dict = calculate_sparsity(df_rent_clean)
print(sparsity_dict)

{'user_id': np.float64(0.0), 'item_id': np.float64(0.0), 'rating': np.float64(0.0), 'size': np.float64(0.0), 'age': np.float64(0.0), 'interaction': np.float64(0.0)}


In [None]:
def preprocess_data(df, sparsity_threshold=0.50):
    """Preprocesses the data by removing numerical features with high sparsity."""
    for col in df.select_dtypes(include=np.number).columns:
        sparsity = df[col].isnull().sum() / len(df)
        if sparsity > sparsity_threshold:
            print(f"Dropping sparse feature: {col} (Sparsity: {sparsity:.2f})")
            df = df.drop(col, axis=1)
    return df

In [None]:
df_rent_clean = preprocess_data(df_rent_clean, sparsity_threshold=0.50)

user_interaction_counts = df_rent_clean.groupby('user_id')['rating'].count()
users_with_enough_interactions = user_interaction_counts[user_interaction_counts >= 2].index
df_rent_clean = df_rent_clean[df_rent_clean["user_id"].isin(users_with_enough_interactions)]
print(f"Number of users with more than 2 interactions: {len(users_with_enough_interactions)}")

Number of users with more than 2 interactions: 33544


In [None]:
median_interaction_strength = df_rent_clean.groupby('user_id')['interaction'].sum().median()

df_rent_clean['user_activity'] = df_rent_clean.groupby('user_id')['interaction'].transform('sum').map(
    lambda x: 'Low' if x < median_interaction_strength else 'High'
)

print(df_rent_clean['user_activity'].value_counts())

user_activity
High    99471
Low     20515
Name: count, dtype: int64


In [None]:
df_rent_clean = df_rent_clean.sort_values('review_date')


train_size = int(0.6 * len(df_rent_clean)) # 60 %
val_size = int(0.2 * len(df_rent_clean))

train_df = df_rent_clean[:train_size]
val_df = df_rent_clean[train_size:train_size + val_size]
test_df = df_rent_clean[train_size + val_size:]

print("Train set shape:", train_df.shape)
print("Validation set shape:", val_df.shape)
print("Test set shape:", test_df.shape)

Train set shape: (71991, 17)
Validation set shape: (23997, 17)
Test set shape: (23998, 17)


In [None]:
###############################################
# 3. User Item Matrix -Train
###############################################

train_user_item_matrix = train_df.pivot_table(
    index="user_id",
    columns="item_id",
    values="interaction",
    aggfunc="sum",
    fill_value=0
)


# Definition ndcg, hr, mrr

In [None]:
###############################################
# 3. Utility/Ranking metrics
###############################################
def dcg_at_k_recursive(relevance_scores, k, b=2):
    """
    Calculate the Discounted Cumulative Gain at rank k recursively.

    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :param b: The base of the logarithm (typically 2).
    :return: DCG at rank k.
    """

    k = min(k, len(relevance_scores))


    if k == 0:
        return 0.0

    if k < b:
        return np.sum([rel / np.log2(idx + 2) for idx, rel in enumerate(relevance_scores[:k])])

    else:
        dcg_k_minus_1 = dcg_at_k_recursive(relevance_scores, k - 1, b)
        rel_k = relevance_scores[k - 1]
        return dcg_k_minus_1 + (rel_k / np.log2(k + 1))

def dcg_at_k(relevance_scores, k, b=2):
    """
    Calculate the Discounted Cumulative Gain at rank k (non-recursive).
    """
    dcg = 0
    for idx, rel in enumerate(relevance_scores[:k]):
        dcg += rel / np.log2(idx + 2)
    return dcg
def idcg_at_k(relevance_scores, k):
    """
    Calculate the Ideal Discounted Cumulative Gain at rank k.
    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :return: Ideal DCG at rank k.
    """
    relevance_scores = sorted(relevance_scores, reverse=True)
    return dcg_at_k(relevance_scores, k)

def ndcg_at_k(relevance_scores, k):
    """
    Calculate the Normalized Discounted Cumulative Gain at rank k.
    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :return: nDCG at rank k.
    """
    dcg = dcg_at_k_recursive(relevance_scores, k)
    idcg = idcg_at_k(relevance_scores, k)
    if idcg == 0:
        return 0
    return dcg / idcg


In [None]:
def hr_at_k(actual, predicted, k=5):
    """
    Computes Hit Rate (HR) at rank k.

    Args:
        actual: list of relevant items
        predicted: ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        Hit Rate at rank k. return q if relevant item is within topk
    """

    for item in predicted[:k]:
        if item in actual:
            return 1
    return 0


def hit_rate_at_k(actual, predicted, k=5):
    """
    Computes Mean Hit Rate (HR) at rank k for all users.

    Args:
        actual: list of relevant items for each user
        predicted: list of predicted ranked items for each user
        k: rank cutoff (default is 5)

    Returns:
        HR at rank k.
    """

    if not actual or len(predicted) == 0:
        return 0.0

    hr_scores = []
    for user_actual, user_predicted in zip(actual, predicted):
        hr = hr_at_k(user_actual, user_predicted, k)
        hr_scores.append(hr)

    return np.mean(hr_scores)

In [None]:
def rr_at_k(actual, predicted, k=5):
    """
    Computes Reciprocal Rank at rank k.

    Args:
        actual: the relevant item(s)
        predicted: the ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        Reciprocal Rank at rank k.
    """

    for i, item in enumerate(predicted[:k]):
        if item == actual:
            return 1 / (i + 1)
    return 0.0


def mrr_at_k(actual, predicted, k=5):
    """
    Computes Mean Reciprocal Rank (MRR) at rank k.

    Args:
        actual: list of relevant items (can be multiple)
        predicted: ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        MRR at rank k.
    """

    if actual is None or len(predicted) == 0:
        return 0.0

    rr_scores = []
    for item in actual:
        rr = rr_at_k(item, predicted, k)
        rr_scores.append(rr)

    return np.mean(rr_scores)

# Fairness metrics

In [None]:



def calculate_disparate_impact(protected_outcomes, privileged_outcomes):
    """
    Args:
        protected_outcomes: List of binary outcomes (1=favorable) for the protected group.
        privileged_outcomes: List of binary outcomes for the privileged group.
    Returns:
        Disparate impact ratio.
    """
    protected_rate = np.mean(protected_outcomes)
    privileged_rate = np.mean(privileged_outcomes)

    if privileged_rate == 0:
        return np.inf

    return protected_rate / privileged_rate


def calculate_group_recommender_unfairness(group1_metrics, group2_metrics):
  """
    Calculates the absolute difference in mean metrics between two groups.
    This metric quantifies the unfairness of a recommender system by examining
    the absolute difference in average performance between different user groups.

    Args:
        group1_metrics (list or numpy.ndarray): A list or numpy array of metrics for group 1.
        group2_metrics (list or numpy.ndarray): A list or numpy array of metrics for group 2.

    Returns:
        float: The absolute difference in mean metrics between the two groups.
  """
  return np.abs(np.mean(group1_metrics) - np.mean(group2_metrics))






In [None]:
def coefficient_of_variation(arr):
    mean_val = np.mean(arr)
    if mean_val == 0:
        return 0
    return np.std(arr) / mean_val

def calculate_ucv(metric_low_group, metric_high_group):
    # Calculate CV for each group then average
    cv_low = coefficient_of_variation(metric_low_group) if len(metric_low_group) > 0 else 0
    cv_high = coefficient_of_variation(metric_high_group) if len(metric_high_group) > 0 else 0
    return (cv_low + cv_high) / 2

def coefficient_of_variance(group):
    """Calculates the coefficient of variance for a group."""
    return np.std(group) / np.mean(group) if np.mean(group) != 0 else 0

# Content based

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


df_rent_clean['category'] = df_rent_clean['category'].fillna('unknown')


def create_weighted_user_profile(user_id, df, interaction_data):
    """
    Create a user profile based on the weighted interaction strengths for the categories
    of the items the user has interacted with.
    """

    user_items = interaction_data.loc[user_id, interaction_data.loc[user_id] > 0].index.tolist()


    user_items_df = df[df['item_id'].isin(user_items)]

    weighted_categories = user_items_df.groupby('category')['rating'].sum()

    user_profile = ' '.join([f'{category} ' * int(weight) for category, weight in weighted_categories.items()])

    return user_profile


def recommend_items_for_user(user_id, df, interaction_data, top_n=5):
    """
    Recommend items for a user based on their weighted profile similarity with all other items.
    """

    user_profile = create_weighted_user_profile(user_id, df, interaction_data)
    if not user_profile:
        return ''

    vectorizer = TfidfVectorizer(stop_words='english')


    all_profiles = df['category'].tolist() + [user_profile]

    profile_matrix = vectorizer.fit_transform(all_profiles)

    cosine_sim = cosine_similarity(profile_matrix[-1:], profile_matrix[:-1])

    sim_scores = list(enumerate(cosine_sim[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    item_indices = [i[0] for i in sim_scores[:top_n]]
    item_scores = [i[1] for i in sim_scores[:top_n]]

    recommended_items = df.iloc[item_indices]

    return recommended_items[['item_id', 'category','rating','age']], item_scores


recommended_items, scores = recommend_items_for_user(user_id=581157, df=df_rent_clean, interaction_data=train_user_item_matrix)

print("Recommended Items for User 581157:")
print(recommended_items)


Recommended Items for User 991571 based on Category and Interaction Strength:
        item_id category  rating   age
125033   126335    dress     4.0  44.0
140548   160612    dress    10.0  49.0
33685    183200    dress    10.0  32.0
6316     127495    dress     6.0  49.0
89430    174086    dress    10.0  41.0


In [None]:
def update_user_profile(user_id, accepted_item, train_matrix, item_metadata=None):
    """
    Updates a user's profile (train_matrix) by incrementing interaction with the accepted item.

    Parameters:
        user_id (int or str): ID of the user.
        accepted_item (int or str): ID of the accepted item.
        train_matrix (pd.DataFrame): User-item interaction matrix.
        item_metadata (pd.DataFrame, optional): DataFrame with item metadata, including 'item_id' and 'category'.
    """


    if item_metadata is not None:
        row = item_metadata[item_metadata['item_id'] == accepted_item]
        if not row.empty:
            item_category = row['category'].iloc[0]



    if user_id not in train_matrix.index:
        train_matrix.loc[user_id] = 0


    if accepted_item not in train_matrix.columns:
        train_matrix[accepted_item] = 0
    train_matrix.loc[user_id, accepted_item] += 1

    return train_matrix

In [None]:
def calculate_metrics_for_user(user_id, actual_item, recommended_items, user_engaged, k=5):
    hr = hr_at_k([actual_item], recommended_items, k)
    mrr = mrr_at_k([actual_item], recommended_items, k)

    relevance_scores = [1 if item == actual_item else 0 for item in recommended_items[:k]]
    dcg = sum([rel / np.log2(i + 2) for i, rel in enumerate(relevance_scores)])
    idcg = sum([1 / np.log2(i + 2) for i in range(min(k, len(relevance_scores)))])
    ndcg = dcg / idcg if idcg > 0 else 0
    cv = coefficient_of_variation(relevance_scores)

    return {
        'User': user_id,
        'NDCG@K': ndcg,
        'HR@K': hr,
        'MRR@K': mrr,
        'CV': cv,
        'Engagement Group': user_engaged
    }


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



def coef_variation(arr):
    mean_val = np.mean(arr)
    return np.std(arr) / mean_val if mean_val != 0 else 0

def create_weighted_user_profile(user_id, df, interaction_data):
    if user_id not in interaction_data.index:
        return ''
    user_items = interaction_data.loc[user_id][interaction_data.loc[user_id] > 0].index.tolist()
    user_items_df = df[df['item_id'].isin(user_items)].copy()
    if user_items_df.empty:
        return ''
    most_recent_date = pd.to_datetime(user_items_df['review_date'].max())
    user_items_df['review_date'] = pd.to_datetime(user_items_df['review_date'])
    user_items_df['time_decay'] = np.exp(-(most_recent_date - user_items_df['review_date']).dt.days / 365)
    weighted = user_items_df.groupby('category')[['interaction', 'time_decay']].apply(lambda g: (g['interaction'] * g['time_decay']).sum())
    return ' '.join([f'{cat} ' * int(weight) for cat, weight in weighted.items()])

def recommend_items_for_user_torch(user_id, df, interaction_data, top_n=5):
    user_profile = create_weighted_user_profile(user_id, df, interaction_data)
    if not user_profile.strip():
        return []
    vectorizer = TfidfVectorizer(stop_words='english')
    all_profiles = df['category'].tolist() + [user_profile]
    profile_matrix = vectorizer.fit_transform(all_profiles)
    cosine_sim = cosine_similarity(profile_matrix[-1:], profile_matrix[:-1])
    sim_scores = sorted(list(enumerate(cosine_sim[0])), key=lambda x: x[1], reverse=True)
    item_indices = [i[0] for i in sim_scores[:top_n]]
    return df.iloc[item_indices]['item_id'].tolist()

def calculate_metrics_for_user(user_id, actual_item, recommended_items, user_engaged, k=5):
    hr = hr_at_k([actual_item], recommended_items, k)
    mrr = mrr_at_k([actual_item], recommended_items, k)
    relevance = [1 if item == actual_item else 0 for item in recommended_items[:k]]
    dcg = sum([rel / np.log2(i + 2) for i, rel in enumerate(relevance)])
    idcg = sum([1 / np.log2(i + 2) for i in range(min(k, len(relevance)))])
    ndcg = dcg / idcg if idcg > 0 else 0
    cv = coef_variation(relevance)
    return {
        'User': user_id,
        'NDCG@K': ndcg,
        'HR@K': hr,
        'MRR@K': mrr,
        'CV': cv,
        'Engagement Group': user_engaged
    }

def update_user_profile(user_id, item_id, interaction_data, df):
    if user_id in interaction_data.index and item_id in interaction_data.columns:
        interaction_data.at[user_id, item_id] += 1
    return interaction_data


In [None]:
###############################################
# HYPERPARAMETER
###############################################
from sklearn.model_selection import ParameterGrid
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np


param_grid = {
    'tfidf_max_df': [0.7, 0.8, 0.9, 1.0],
    'tfidf_min_df': [0.0, 0.01, 0.05, 0.1],
    'k': [5, 10, 15]
}

grid = ParameterGrid(param_grid)

best_params = None
best_avg_ndcg = -1
results = []

def recommend_items_for_user_tuned(user_id, df, interaction_data, tfidf_vectorizer, top_n=5):
    user_profile = create_weighted_user_profile(user_id, df, interaction_data)
    all_categories = df['category'].tolist() + [user_profile]
    tfidf_matrix = tfidf_vectorizer.fit_transform(all_categories)
    user_vec = tfidf_matrix[-1]
    item_vecs = tfidf_matrix[:-1]
    cosine_sim = cosine_similarity(user_vec, item_vecs).flatten()
    top_indices = cosine_sim.argsort()[::-1][:top_n]
    recommended_items = df.iloc[top_indices]['item_id'].tolist()
    return recommended_items


for params in grid:
    print(f"Testing parameters: {params}")


    tfidf_vectorizer = TfidfVectorizer(stop_words='english',
                                       max_df=params['tfidf_max_df'],
                                       min_df=params['tfidf_min_df'])

    round_metrics = []
    test_items = df_rent_clean.groupby("user_id")["item_id"].first().to_dict()

    sample_users = list(test_items.keys())

    for user_id in sample_users:
        actual_item = test_items[user_id]
        if user_id not in train_user_item_matrix.index:
             continue

        user_engagement = df_rent_clean[df_rent_clean["user_id"] == user_id]["user_activity"].iloc[0]

        recommended_items = recommend_items_for_user_tuned(user_id, df_rent_clean, train_user_item_matrix, tfidf_vectorizer, top_n=params['k'])
        metrics = calculate_metrics_for_user(user_id, actual_item, recommended_items, user_engagement, k=params['k'])
        round_metrics.append(metrics)

    round_metrics_df = pd.DataFrame(round_metrics)
    if not round_metrics_df.empty:
        avg_ndcg = round_metrics_df['NDCG@K'].mean()
        avg_hr = round_metrics_df['HR@K'].mean()
        avg_mrr = round_metrics_df['MRR@K'].mean()
    else:
        avg_ndcg, avg_hr, avg_mrr = 0, 0, 0

    print(f"Average NDCG: {avg_ndcg}, Average HR: {avg_hr}, Average MRR: {avg_mrr}")


    results.append({
        'params': params,
        'avg_ndcg': avg_ndcg,
        'avg_hr': avg_hr,
        'avg_mrr': avg_mrr
    })

    if avg_ndcg > best_avg_ndcg:
        best_avg_ndcg = avg_ndcg
        best_params = params

print("\n--- Tuning Complete ---")
print(f"Best parameters found: {best_params}")
print(f"Best average NDCG: {best_avg_ndcg}")
results_df = pd.DataFrame(results)
print("\nResults for all parameter combinations:")
print(results_df)
# Testing parameters: {'k': 5, 'tfidf_max_df': 1, 'tfidf_min_df': 0}

In [None]:
from copy import deepcopy

def run_rounds_with_warmup(df, train_matrix, rounds=3, warmup_fraction=0.1):
    df['review_date'] = pd.to_datetime(df['review_date'])
    df = df.sort_values(by='review_date')
    train_matrix.index = train_matrix.index.astype(df['user_id'].dtype)

    all_round_metrics = []
    warmup_end = int(len(df) * warmup_fraction)
    warmup_data = df.iloc[:warmup_end]
    train_matrix_warmup = deepcopy(train_matrix)

    for _, row in warmup_data.iterrows():
        if row['user_id'] in train_matrix_warmup.index:
            train_matrix_warmup = update_user_profile(row['user_id'], row['item_id'], train_matrix_warmup, df)

    window_size = int((len(df) - warmup_end) / rounds)

    for round_num in range(rounds):
        print(f"\n--- Round {round_num + 1} ---")
        round_metrics = []

        start = warmup_end + round_num * window_size
        end = min(warmup_end + (round_num + 1) * window_size, len(df))
        round_data = df.iloc[start:end]
        historical_data = df.iloc[:start]

        test_items = round_data.groupby("user_id")["item_id"].first().to_dict()

        for user_id, actual_item in test_items.items():
            if user_id not in train_matrix_warmup.index:
                continue

            engagement = round_data[round_data["user_id"] == user_id]["user_activity"].iloc[0]
            recommendations = recommend_items_for_user_torch(user_id, historical_data, train_matrix_warmup)
            metrics = calculate_metrics_for_user(user_id, actual_item, recommendations, engagement)
            round_metrics.append(metrics)

            if round_num < rounds - 1:
                train_matrix_warmup = update_user_profile(user_id, actual_item, train_matrix_warmup, df)

        df_metrics = pd.DataFrame(round_metrics)

        def get_ucv(metric_name):
            low = df_metrics[df_metrics['Engagement Group'] == 'Low'][metric_name].values
            high = df_metrics[df_metrics['Engagement Group'] != 'Low'][metric_name].values
            return (coef_variation(low) + coef_variation(high)) / 2

        ucv_ndcg = get_ucv('NDCG@K')
        ucv_hr = get_ucv('HR@K')
        ucv_mrr = get_ucv('MRR@K')

        print(f"UCV@K for NDCG: {ucv_ndcg:.6f}, HR: {ucv_hr:.6f}, MRR: {ucv_mrr:.6f}")

        low_hr_outcomes = df_metrics[df_metrics['Engagement Group'] == 'Low']['HR@K'].values
        high_hr_outcomes = df_metrics[df_metrics['Engagement Group'] != 'Low']['HR@K'].values
        di_hr = calculate_disparate_impact(low_hr_outcomes, high_hr_outcomes)
        print(f"Disparate Impact (HR@K): {di_hr:.6f}")
        low_ndcg_outcomes = df_metrics[df_metrics['Engagement Group'] == 'Low']['NDCG@K'].values
        high_ndcg_outcomes = df_metrics[df_metrics['Engagement Group'] != 'Low']['NDCG@K'].values
        di_ndcg = calculate_disparate_impact(low_ndcg_outcomes, high_ndcg_outcomes)
        print(f"Disparate Impact (NDCG@K): {di_ndcg:.6f}")
        gru_ndcg = calculate_group_recommender_unfairness(low_ndcg_outcomes, high_ndcg_outcomes)
        print(f"Group Recommender Unfairness (NDCG@K): {gru_ndcg:.6f}")
        low_mrr_outcomes = df_metrics[df_metrics['Engagement Group'] == 'Low']['MRR@K'].values
        high_mrr_outcomes = df_metrics[df_metrics['Engagement Group'] != 'Low']['MRR@K'].values
        di_mrr = calculate_disparate_impact(low_mrr_outcomes, high_mrr_outcomes)
        print(f"Disparate Impact (MRR@K): {di_mrr:.6f}")
        gru_mrr = calculate_group_recommender_unfairness(low_mrr_outcomes, high_mrr_outcomes)
        print(f"Group Recommender Unfairness (MRR@K): {gru_mrr:.6f}")

        gru_hr = calculate_group_recommender_unfairness(low_hr_outcomes, high_hr_outcomes)
        print(f"Group Recommender Unfairness (HR@K): {gru_hr:.6f}")
        avg_ndcg = df_metrics['NDCG@K'].mean()
        avg_hr = df_metrics['HR@K'].mean()
        avg_mrr = df_metrics['MRR@K'].mean()
        print(f"Average NDCG: {avg_ndcg:.6f}, Average HR: {avg_hr:.6f}, Average MRR: {avg_mrr:.6f}")
        grouped = df_metrics.groupby("Engagement Group").agg({
            "NDCG@K": "mean", "HR@K": "mean", "MRR@K": "mean", "CV": "mean"
        })
        grouped["UCV_NDCG"] = ucv_ndcg
        print(grouped)
        all_round_metrics.append(grouped)

    return all_round_metrics

In [None]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning)
results = run_rounds_with_warmup(df_rent_clean, train_user_item_matrix, rounds=3, warmup_fraction=0.1)
print(results)


--- Round 1 ---
UCV@K for NDCG: 6.551855, HR: 6.297242, MRR: 6.471418
Disparate Impact (HR@K): 1.310962
Disparate Impact (NDCG@K): 1.300394
Group Recommender Unfairness (NDCG@K): 0.005288
Disparate Impact (MRR@K): 1.258475
Group Recommender Unfairness (MRR@K): 0.005314
Group Recommender Unfairness (HR@K): 0.006710
Average NDCG: 0.018932, Average HR: 0.023265, Average MRR: 0.021894
                    NDCG@K      HR@K     MRR@K        CV  UCV_NDCG
Engagement Group                                                  
High              0.017603  0.021579  0.020558  0.020524  6.551855
Low               0.022891  0.028289  0.025872  0.026010  6.551855

--- Round 2 ---
UCV@K for NDCG: 11.427074, HR: 10.572097, MRR: 11.314175
Disparate Impact (HR@K): 1.582535
Disparate Impact (NDCG@K): 1.518642
Group Recommender Unfairness (NDCG@K): 0.002698
Disparate Impact (MRR@K): 1.589911
Group Recommender Unfairness (MRR@K): 0.003384
Group Recommender Unfairness (HR@K): 0.004160
Average NDCG: 0.005737, Ave

R1:
UCV@K for NDCG: 6.551855, HR: 6.297242, MRR: 6.471418
Disparate Impact (HR@K): 1.310962
Disparate Impact (NDCG@K): 1.300394
Group Recommender Unfairness (NDCG@K): 0.005288
Disparate Impact (MRR@K): 1.258475
Group Recommender Unfairness (MRR@K): 0.005314
Group Recommender Unfairness (HR@K): 0.006710
                    NDCG@K      HR@K     MRR@K        CV  UCV_NDCG
Engagement Group                                                  
High              0.017603  0.021579  0.020558  0.020524  6.551855
Low               0.022891  0.028289  0.025872  0.026010  6.551855


R2:
UCV@K for NDCG: 11.427074, HR: 10.572097, MRR: 11.314175
Disparate Impact (HR@K): 1.582535
Disparate Impact (NDCG@K): 1.518642
Group Recommender Unfairness (NDCG@K): 0.002698
Disparate Impact (MRR@K): 1.589911
Group Recommender Unfairness (MRR@K): 0.003384
Group Recommender Unfairness (HR@K): 0.004160
                    NDCG@K      HR@K     MRR@K        CV   UCV_NDCG
Engagement Group                                                   
High              0.005201  0.007141  0.005736  0.004878  11.427074
Low               0.007899  0.011302  0.009119  0.009449  11.427074

R3: UCV@K for NDCG: 17.236411, HR: 15.919690, MRR: 17.047350
Group Recommender Unfairness (NDCG@K): 0.001507
Disparate Impact (MRR@K): 1.798202
Group Recommender Unfairness (MRR@K): 0.001892
Group Recommender Unfairness (HR@K): 0.002951
                    NDCG@K      HR@K     MRR@K        CV   UCV_NDCG
Engagement Group                                                   
High              0.002077  0.002840  0.002371  0.001920  17.236411
Low               0.003584  0.005792  0.004263  0.005124  17.236411
[                    NDCG@K      HR@K     MRR@K        CV  UCV_NDCG
Engagement Group                                                  
High              0.017603  0.021579  0.020558  0.020524  6.551855
Low               0.022891  0.028289  0.025872  0.026010  6.551855,                     NDCG@K      HR@K     MRR@K        CV   UCV_NDCG
Engagement Group                                                   
High              0.005201  0.007141  0.005736  0.004878  11.427074
Low               0.007899  0.011302  0.009119  0.009449  11.427074,                     NDCG@K      HR@K     MRR@K        CV   UCV_NDCG
Engagement Group                                                   
High              0.002077  0.002840  0.002371  0.001920  17.236411
Low               0.003584  0.005792  0.004263  0.005124  17.236411]

Hybrid - in hybrid.py Output

/usr/local/bin/python3.11 /Users/helengaiser/Documents/ucph/thesis/datasets/hybrid.py
{'user_id': 0.0, 'item_id': 0.0, 'rating': 0.0, 'size': 0.0, 'age': 0.0, 'interaction': 0.0}
Number of users with more than 2 interactions: 33544
user_activity
Low     2173
High    1869
Name: count, dtype: int64
user_activity
High    99471
Low     20515
Name: count, dtype: int64
Train set shape: (71991, 17)
Validation set shape: (23997, 17)
Test set shape: (23998, 17)
Recommended Items for User 991571 based on Category and Interaction Strength:
        item_id category  rating   age
125033   126335    dress     4.0  44.0
140548   160612    dress    10.0  49.0
33685    183200    dress    10.0  32.0
6316     127495    dress     6.0  49.0
89430    174086    dress    10.0  41.0
Round 1
Relevant Items: [126335, 160612]
Excluded Items: {183200, 174086, 127495}
Round 2
Relevant Items: [126335, 160612]
Excluded Items: {183200, 174086, 127495}
Round 3
Relevant Items: [126335, 160612]
Excluded Items: {183200, 174086, 127495}
/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/threadpoolctl.py:1010: RuntimeWarning:
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md

  warnings.warn(msg, RuntimeWarning)
Distances shape: (27439, 5)
Indices shape: (27439, 5)
Hybrid Recommendations for User 623100:
(144714, 0.443942765646799)
(149739, 0.443942765646799)
(145906, 0.443942765646799)
(125564, 0.443942765646799)
(2382109, 0.08872854958331756)
Starting round 1 of hybrid recommendation...
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037880  0.063447  0.029545
Low               0.052064  0.086877  0.040638
Disparate Impact for NDCG: 1.374453403814944
Disparate Impact for HR: 1.3692792455691942
Disparate Impact for MRR: 1.3754923097864333
GRU for NDCG: 0.014184214633436706
GRU for HR: 0.023429719763331372
GRU for MRR: 0.011093803575659662
UCV for NDCG: 3.783842122989167
UCV for HR: 3.542010191603885
UCV for MRR: 4.193313252666606
Average NDCG: 0.04183072954688187
Average HR: 0.06997339553190714
Average MRR: 0.03263481419390891
Round 1 Metrics with UCV:
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037880  0.063447  0.029545
Low               0.052064  0.086877  0.040638


Starting round 2 of hybrid recommendation...
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037863  0.063397  0.029539
Low               0.052072  0.086877  0.040649
Disparate Impact for NDCG: 1.3752626990625592
Disparate Impact for HR: 1.3703703047290101
Disparate Impact for MRR: 1.3761358571231965
GRU for NDCG: 0.01420870503154023
GRU for HR: 0.02348023501893856
GRU for MRR: 0.011110600245139882
UCV for NDCG: 3.784646535140646
UCV for HR: 3.5428272097439013
UCV for MRR: 4.193869171717489
Average NDCG: 0.041821121318304115
Average HR: 0.0699369510550676
Average MRR: 0.03263359937801426
Round 2 Metrics with UCV:
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037863  0.063397  0.029539
Low               0.052072  0.086877  0.040649


Starting round 3 of hybrid recommendation...
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037924  0.063548  0.029572
Low               0.052132  0.087008  0.040686
Disparate Impact for NDCG: 1.3746302522383946
Disparate Impact for HR: 1.3691612200820935
Disparate Impact for MRR: 1.3758614607803659
GRU for NDCG: 0.014207550854668523
GRU for HR: 0.023459527928029578
GRU for MRR: 0.011114836287170447
UCV for NDCG: 3.7807913523847176
UCV for HR: 3.539043707813361
UCV for MRR: 4.190262158600773
Average NDCG: 0.0418816395012446
Average HR: 0.07008272896242575
Average MRR: 0.032667614223064495
Round 3 Metrics with UCV:
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037924  0.063548  0.029572
Low               0.052132  0.087008  0.040686



Hybrid Metrics for Round 1:
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037880  0.063447  0.029545
Low               0.052064  0.086877  0.040638

Hybrid Metrics for Round 2:
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037863  0.063397  0.029539
Low               0.052072  0.086877  0.040649

Hybrid Metrics for Round 3:
                    NDCG@K      HR@K     MRR@K
Engagement Group                              
High              0.037924  0.063548  0.029572
Low               0.052132  0.087008  0.040686

--- Fairness Metrics for Round 1 ---
NDCG:
  Disparate Impact (Low/High): 1.3745
  Group Recommender Unfairness (Abs Diff): 0.0142
MRR:
  Disparate Impact (Low/High): 1.3755
  Group Recommender Unfairness (Abs Diff): 0.0111
HR@K:
  Disparate Impact (Low/High): 1.3693
  Group Recommender Unfairness (Abs Diff): 0.0234

--- Fairness Metrics for Round 2 ---
NDCG:
  Disparate Impact (Low/High): 1.3753
  Group Recommender Unfairness (Abs Diff): 0.0142
MRR:
  Disparate Impact (Low/High): 1.3761
  Group Recommender Unfairness (Abs Diff): 0.0111
HR@K:
  Disparate Impact (Low/High): 1.3704
  Group Recommender Unfairness (Abs Diff): 0.0235

--- Fairness Metrics for Round 3 ---
NDCG:
  Disparate Impact (Low/High): 1.3746
  Group Recommender Unfairness (Abs Diff): 0.0142
MRR:
  Disparate Impact (Low/High): 1.3759
  Group Recommender Unfairness (Abs Diff): 0.0111
HR@K:
  Disparate Impact (Low/High): 1.3692
  Group Recommender Unfairness (Abs Diff): 0.0235

Process finished with exit code 0