In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
df_modcloth = pd.read_json("modcloth_final_data.json", lines=True)

# Preprocessing

In [None]:
df_modcloth_clean = df_modcloth.dropna(subset=["user_id","item_id"]).drop_duplicates()

In [None]:
vectorizer = CountVectorizer()
category_bow = vectorizer.fit_transform(df_modcloth_clean['category'])

In [None]:
###############################################
# 3. Implicit Feedback
###############################################

fit_mapping = {'fit': 0, 'large': 2, 'small': 1}
df_modcloth_clean.loc[:, 'fit'] = df_modcloth_clean['fit'].map(fit_mapping)


user_reviews = df_modcloth_clean.groupby('user_id')['review_text'].count().to_dict()
df_modcloth_clean.loc[:, 'num_user_reviews'] = df_modcloth_clean['user_id'].map(user_reviews).fillna(0).astype(int)
max_reviews = df_modcloth_clean['num_user_reviews'].max()
df_modcloth_clean.loc[:, 'num_user_reviews_normalized'] = df_modcloth_clean['num_user_reviews'] / max_reviews

max_quality = df_modcloth_clean['quality'].max()
df_modcloth_clean.loc[:, 'quality_normalized'] = df_modcloth_clean['quality'] / max_quality


df_modcloth_clean.loc[:, 'interaction_strength'] = (
    df_modcloth_clean['fit'] * 0.33 +
    df_modcloth_clean['num_user_reviews'] * 0.33 +
    df_modcloth_clean['quality'] * 0.34
)

In [None]:

df_modcloth_clean['fit'] = df_modcloth_clean['fit'].fillna(df_modcloth_clean['fit'].median())
df_modcloth_clean['num_user_reviews'] = df_modcloth_clean['num_user_reviews'].fillna(df_modcloth_clean['num_user_reviews'].median())
df_modcloth_clean['quality'] = df_modcloth_clean['quality'].fillna(df_modcloth_clean['quality'].median())

  df_modcloth_clean['fit'] = df_modcloth_clean['fit'].fillna(df_modcloth_clean['fit'].median())


In [None]:
def calculate_sparsity(df):
    """Calculates sparsity for each numerical feature in the DataFrame.

    Args: dataset as df
    Return : dictionary with item feature name and sparsity values
    """
    numerical_features = df.select_dtypes(include=np.number).columns
    sparsity_results = {}

    for feature in numerical_features:
        sparsity = df[feature].isnull().sum() / len(df)
        sparsity_results[feature] = sparsity

    return sparsity_results

sparsity_dict = calculate_sparsity(df_modcloth_clean)


for feature, sparsity in sparsity_dict.items():
    print(f"Sparsity of {feature}: {sparsity:.2f}")

Sparsity of item_id: 0.00
Sparsity of waist: 0.97
Sparsity of size: 0.00
Sparsity of quality: 0.00
Sparsity of hips: 0.32
Sparsity of bra size: 0.07
Sparsity of fit: 0.00
Sparsity of user_id: 0.00
Sparsity of shoe size: 0.66
Sparsity of num_user_reviews: 0.00
Sparsity of num_user_reviews_normalized: 0.00
Sparsity of quality_normalized: 0.00


In [None]:
def preprocess_data(df, sparsity_threshold=0.50, user_id_col='user_id'):
    """Preprocesses the data by removing numerical features with high sparsity
    Args:
    df : dataset
    sparsity_threshold: how sparse can a feature be to be accepted
    user_id columns

    Return:
    preprocessed dataset -> df_modcloth_clean
    """
    numerical_cols = df.select_dtypes(include=np.number).columns
    cols_to_check = [col for col in numerical_cols if col != user_id_col]

    for col in cols_to_check:
        sparsity = df[col].isnull().sum() / len(df)
        if sparsity > sparsity_threshold:
            print(f"Dropping sparse feature: {col} (Sparsity: {sparsity:.2f})")
            df = df.drop(col, axis=1)

    user_interaction_counts = df_modcloth_clean.groupby('user_id')['interaction_strength'].count()
    print(user_interaction_counts)
    users_with_more_than_two_interactions = user_interaction_counts[user_interaction_counts >= 2]
    print(f"Number of users with more than 2 interactions: {len(users_with_more_than_two_interactions)}")
    df = df_modcloth_clean[df_modcloth_clean["user_id"].isin(users_with_more_than_two_interactions.index)]

    return df

In [None]:
df_modcloth_clean = preprocess_data(df_modcloth_clean, sparsity_threshold=0.50)

Dropping sparse feature: waist (Sparsity: 0.97)
Dropping sparse feature: shoe size (Sparsity: 0.66)
user_id
6         1
46        1
55        1
66        4
104       2
         ..
999864    1
999887    2
999888    1
999923    3
999972    3
Name: interaction_strength, Length: 47958, dtype: int64
Number of users with more than 2 interactions: 15924


In [None]:
user_activity = df_modcloth_clean.groupby('user_id')['interaction_strength'].sum()

In [None]:
median_interaction_strength = df_modcloth_clean.groupby('user_id')['interaction_strength'].sum().median()

df_modcloth_clean['user_activity'] = df_modcloth_clean.groupby('user_id')['interaction_strength'].transform('sum').map(
    lambda x: 'Low' if x < median_interaction_strength else 'High'
)

print(df_modcloth_clean['user_activity'].value_counts())

user_activity
High    34900
Low     15471
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split
# random Split
train_df, temp_df = train_test_split(df_modcloth_clean, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

In [None]:
###############################################
# 3. User Item Matrix
###############################################

# Rows = users.
#Columns = items.
#cell values represent user  interacted with an item.

train_user_item_matrix = train_df.pivot_table(
    index="user_id",
    columns="item_id",
    values="interaction_strength",
    aggfunc="sum",
    fill_value=0
)
val_user_item_matrix = val_df.pivot_table(
    index="user_id",
    columns="item_id",
    values="interaction_strength",
    aggfunc="sum",
    fill_value=0
)
test_user_item_matrix = test_df.pivot_table(
    index="user_id",
    columns="item_id",
    values="interaction_strength",
    aggfunc="sum",
    fill_value=0
)

  train_user_item_matrix = train_df.pivot_table(
  val_user_item_matrix = val_df.pivot_table(
  test_user_item_matrix = test_df.pivot_table(


#Definition effectiveness

In [None]:
###############################################
# 3. Utility/Ranking metrics
###############################################
def dcg_at_k_recursive(relevance_scores, k, b=2):
    """
    Calculate the Discounted Cumulative Gain at rank k recursively.

    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :param b: The base of the logarithm (typically 2).
    :return: DCG at rank k.
    """
    k = min(k, len(relevance_scores))
    if k == 0:
        return 0.0

    if k < b:
        return np.sum([rel / np.log2(idx + 2) for idx, rel in enumerate(relevance_scores[:k])])

    else:
        dcg_k_minus_1 = dcg_at_k_recursive(relevance_scores, k - 1, b)
        rel_k = relevance_scores[k - 1]
        return dcg_k_minus_1 + (rel_k / np.log2(k + 1))

def dcg_at_k(relevance_scores, k, b=2):
    """
    Calculate the Discounted Cumulative Gain at rank k (non-recursive).
    """
    dcg = 0
    for idx, rel in enumerate(relevance_scores[:k]):
        dcg += rel / np.log2(idx + 2)
    return dcg
def idcg_at_k(relevance_scores, k):
    """
    Calculate the Ideal Discounted Cumulative Gain at rank k.
    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :return: Ideal DCG at rank k.
    """
    relevance_scores = sorted(relevance_scores, reverse=True)
    return dcg_at_k(relevance_scores, k)

def ndcg_at_k(relevance_scores, k):
    """
    Calculate the Normalized Discounted Cumulative Gain at rank k.
    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :return: nDCG at rank k.
    """
    dcg = dcg_at_k_recursive(relevance_scores, k)
    idcg = idcg_at_k(relevance_scores, k)
    if idcg == 0:
        return 0
    return dcg / idcg


In [None]:
def hr_at_k(actual, predicted, k=5):
    """
    Computes Hit Rate (HR) at rank k.

    Args:
        actual: list of relevant items
        predicted: ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        Hit Rate at rank k.
    """

    for item in predicted[:k]:
        if item in actual:
            return 1
    return 0



def hit_rate_at_k(actual, predicted, k=5):
    """
    Computes Mean Hit Rate (HR) at rank k for all users.

    Args:
        actual: list of relevant items for each user
        predicted: list of predicted ranked items for each user
        k: rank cutoff (default is 5)

    Returns:
        HR at rank k.
    """

    if not actual or len(predicted) == 0:
        return 0.0
    hr_scores = []
    for user_actual, user_predicted in zip(actual, predicted):
        hr = hr_at_k(user_actual, user_predicted, k)
        hr_scores.append(hr)
    return np.mean(hr_scores)

In [None]:
def rr_at_k(actual, predicted, k=5):
    """
    Computes Reciprocal Rank at rank k.

    Args:
        actual: the relevant item(s)
        predicted: the ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        Reciprocal Rank at rank k.
    """

    for i, item in enumerate(predicted[:k]):
        if item == actual:
            return 1 / (i + 1)
    return 0.0


def mrr_at_k(actual, predicted, k=5):
    """
    Computes Mean Reciprocal Rank (MRR) at rank k.

    Args:
        actual: list of relevant items (can be multiple)
        predicted: ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        MRR at rank k.
    """

    if actual is None or len(predicted) == 0:
        return 0.0


    rr_scores = []
    for item in actual:
        rr = rr_at_k(item, predicted, k)
        rr_scores.append(rr)
    return np.mean(rr_scores)



#DEfinition Fairness metrics

In [None]:
###############################################
# 3. Fairness metrics
###############################################
def calculate_disparate_impact(protected_outcomes, privileged_outcomes):
    """
    Args:
        protected_outcomes: List of binary outcomes (1=favorable) for the protected group.
        privileged_outcomes: List of binary outcomes for the privileged group.
    Returns:
        Disparate impact ratio.
    """
    protected_rate = np.mean(protected_outcomes)
    privileged_rate = np.mean(privileged_outcomes)

    if privileged_rate == 0:
        return np.inf

    return protected_rate / privileged_rate


def calculate_group_recommender_unfairness(group1_metrics, group2_metrics):
  """
    Calculates the absolute difference in mean metrics between two groups.
    This metric quantifies the unfairness of a recommender system by examining
    the absolute difference in average performance between different user groups.

    Args:
        group1_metrics (list or numpy.ndarray): A list or numpy array of metrics for group 1.
        group2_metrics (list or numpy.ndarray): A list or numpy array of metrics for group 2.

    Returns:
        float: The absolute difference in mean metrics between the two groups.
  """
  return np.abs(np.mean(group1_metrics) - np.mean(group2_metrics))


In [None]:
def coefficient_of_variation(arr):
    mean_val = np.mean(arr)
    if mean_val == 0:
        return 0
    return np.std(arr) / mean_val

def calculate_ucv(metric_low_group, metric_high_group):

    cv_low = coefficient_of_variation(metric_low_group) if len(metric_low_group) > 0 else 0
    cv_high = coefficient_of_variation(metric_high_group) if len(metric_high_group) > 0 else 0
    return (cv_low + cv_high) / 2

def coefficient_of_variance(group):
    """Calculates the coefficient of variance for a group."""
    return np.std(group) / np.mean(group) if np.mean(group) != 0 else 0

def coef_variation(arr):
    mean_val = np.mean(arr)
    if mean_val == 0:
        return 0
    return np.std(arr) / mean_val



# First round

In [None]:
!pip install lightfm

Collecting lightfm
  Downloading lightfm-1.17.tar.gz (316 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/316.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m225.3/316.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp311-cp311-linux_x86_64.whl size=831162 sha256=3cfe6b755d99d0fa2236707f10661d593a7dc0f565573d7d8449604c2fa68d68
  Stored in directory: /root/.cache/pip/wheels/b9/0d/8a/0729d2e6e3ca2a898ba55201f905da7db3f838a33df5b3fcdd
Successfully built lightfm
Installing collected packages: lightfm
Successfully installed lightfm-1.17


In [None]:
import numpy as np
from lightfm import LightFM
from scipy.sparse import coo_matrix # sparse matrix
from lightfm.evaluation import precision_at_k, recall_at_k
###############################################
# 3. WARP Model
###############################################

interactions = coo_matrix(train_user_item_matrix.values)

# Create a LightFM model using 'warp' (Weighted Approximate Ranking) loss
# why warp ? because handles implicit feedback effectively,
# offers computational efficiency, and has a track record of good performance.
# predicting rankings for each
model = LightFM(loss='warp')
 # https://making.lyst.com/lightfm/docs/lightfm.html


model.fit(interactions, epochs=30, num_threads=2)

#  precision@k, recall@k) on the training set
train_precision = precision_at_k(model, interactions, k=5).mean()
train_recall = recall_at_k(model, interactions, k=5).mean()


print(f"Train Precision at k=5: {train_precision}")
print(f"Train Recall at k=5: {train_recall}")


user_id = 55 # example
scores = model.predict(user_id, np.arange(interactions.shape[1]))

top_items = np.argsort(-scores)

print("Top 10 recommended items for user 55:", top_items[:10])

Train Precision at k=5: 0.26242509484291077
Train Recall at k=5: 0.6313707094613337
Top 10 recommended items for user 55: [397 503 739 698 796 388 875  84 385  41]


In [None]:
#Best parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'loss': 'warp', 'no_components': 50, 'user_alpha': 1e-05}, Best AUC: 0.775144100189209
#<lightfm.lightfm.LightFM at 0x7b035c2baf10>
best_model = LightFM(loss='warp', no_components=50, learning_rate=0.01, item_alpha=1e-06, user_alpha=0.0)
best_model.fit(interactions, epochs=30, num_threads=2)


<lightfm.lightfm.LightFM at 0x7d029d5ecb50>

In [None]:
test_items = test_df.groupby("user_id")["item_id"].first().to_dict()

def calculate_metrics_for_user(user, actual_item, recs, group):
    relevance_scores = [1 if item == actual_item else 0 for item in recs]
    ndcg = ndcg_at_k(relevance_scores, k=6)
    hr = hr_at_k([actual_item], recs, k=5)
    mrr = mrr_at_k([actual_item], recs, k=5)
    cv = coefficient_of_variance(relevance_scores)
    return [user, ndcg, hr, mrr, cv, group]



metrics_lightfm = []
for user_id in train_user_item_matrix.index:
    actual_item = test_items.get(user_id, None)
    if actual_item is None:
        continue
    engagement_level = df_modcloth_clean[df_modcloth_clean["user_id"] == user_id][
        "user_activity"
    ].values[0]
    try:
        user_id_internal = train_user_item_matrix.index.get_loc(user_id)
    except KeyError:

        print(f"User {user_id} not found in training data. Skipping.")
        continue

    scores = best_model.predict(user_id_internal, np.arange(interactions.shape[1]))
    top_items = np.argsort(-scores)
    recs = [train_user_item_matrix.columns[i] for i in top_items]
    metrics_lightfm.append(
        calculate_metrics_for_user(user_id, actual_item, recs, engagement_level)
    )

metrics_lightfm_df = pd.DataFrame(
    metrics_lightfm, columns=["User", "NDCG@K", "HR@K", "MRR@K","CV", "Engagement Group"]
)
metrics_lightfm_df['User'] = pd.to_numeric(metrics_lightfm_df['User'], errors='coerce')
grouped_metrics_lightfm = metrics_lightfm_df.groupby("Engagement Group").agg(
    {"NDCG@K": "mean", "HR@K": "mean", "MRR@K": "mean", "CV":"mean"}
)
print("\nLightFM Metrics by User Engagement Level:")
grouped_metrics_lightfm



LightFM Metrics by User Engagement Level:


Unnamed: 0_level_0,NDCG@K,HR@K,MRR@K,CV
Engagement Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
High,0.062067,0.0996,0.040294,30.77748
Low,0.074612,0.121676,0.051491,30.698086


In [None]:
ndcg_low = grouped_metrics_lightfm.loc["Low", "NDCG@K"]
ndcg_moderate_high = grouped_metrics_lightfm.loc["High", "NDCG@K"]

di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_moderate_high)
gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_moderate_high)

cv_low_ndcg = coefficient_of_variance(ndcg_low)
cv_moderate_high_ndcg = coefficient_of_variance(ndcg_moderate_high)


print(f"\nLightFM - NDCG Metrics:")
print(f"Disparate Impact: {di_ndcg}")
print(f"Group Recommender Unfairness: {gru_ndcg}")

hr_low = grouped_metrics_lightfm.loc["Low", "HR@K"]
hr_moderate_high = grouped_metrics_lightfm.loc["High", "HR@K"]

di_hr = calculate_disparate_impact(hr_low, hr_moderate_high)
gru_hr = calculate_group_recommender_unfairness(hr_low, hr_moderate_high)

cv_low_hr = coefficient_of_variance(hr_low)
cv_moderate_high_hr = coefficient_of_variance(hr_moderate_high)


print(f"\nLightFM - HR@K Metrics:")
print(f"Disparate Impact: {di_hr}")
print(f"Group Recommender Unfairness: {gru_hr}")

mrr_low = grouped_metrics_lightfm.loc["Low", "MRR@K"]
mrr_moderate_high = grouped_metrics_lightfm.loc["High", "MRR@K"]

di_mrr = calculate_disparate_impact(mrr_low, mrr_moderate_high)
gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_moderate_high)

cv_low_mrr = coefficient_of_variance(mrr_low)
cv_moderate_high_mrr = coefficient_of_variance(mrr_moderate_high)


print(f"\nLightFM - MRR@K Metrics:")
print(f"Disparate Impact: {di_mrr}")
print(f"Group Recommender Unfairness: {gru_mrr}")



LightFM - NDCG Metrics:
Disparate Impact: 1.2021155521108302
Group Recommender Unfairness: 0.01254474364446443

LightFM - HR@K Metrics:
Disparate Impact: 1.2216454825105725
Group Recommender Unfairness: 0.0220759222856768

LightFM - MRR@K Metrics:
Disparate Impact: 1.2778620410664498
Group Recommender Unfairness: 0.011196294893571775


In [None]:
def calculate_ucv(metric_low_group, metric_high_group):
    """Calculates the Unconditional Coefficient of Variation (UCV)."""
    cv_low = coefficient_of_variation(metric_low_group) if isinstance(metric_low_group, (list, np.ndarray)) and len(metric_low_group) > 0 else 0
    cv_high = coefficient_of_variation(metric_high_group) if isinstance(metric_high_group, (list, np.ndarray)) and len(metric_high_group) > 0 else 0
    return (cv_low + cv_high) / 2

def coefficient_of_variation(arr):
    """Calculates the Coefficient of Variation (CV)."""
    if isinstance(arr, (int, float, np.number)):
    mean_val = np.mean(arr)
    if mean_val == 0:
        return 0
    return np.std(arr) / mean_val

# multiple rounds

In [None]:
###############################################
# 3. Multiple Rounds WARP
###############################################

from lightfm import LightFM
from scipy.sparse import csr_matrix #-> sparse matrix
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.cross_validation import random_train_test_split
from sklearn.metrics import ndcg_score


def evaluate_lightfm_multiple_rounds(
    interactions, test_df, df_rent_clean, num_rounds=3, k=5, epochs=30, user_id_to_print=55
):
    """
    Evaluates LightFM model with multiple training rounds, fairness metrics, and prints recommendations for a user.

    Args:
        interactions: The user-item interaction matrix (sparse).
        test_df: The test DataFrame.
        df_rent_clean: The cleaned main DataFrame with user activity.
        num_rounds: The number of training rounds.
        k: The value of k for precision@k, recall@k metrics.
        epochs: The number of epochs for each training round.
        user_id_to_print: The user ID for which recommendations will be printed.

    Returns:
        A dictionary containing metrics for each round.
    """

    train_interactions, test_interactions = random_train_test_split(
        interactions, test_percentage=0.2, random_state=42
    )

    round_metrics = {}
    model = LightFM( # best parameters
        loss="warp",
        no_components=50,
        learning_rate=0.01,
        item_alpha=1e-06,
        user_alpha=0.0,
    )

    test_items = test_df.groupby("user_id")["item_id"].first().to_dict()
    user_id_map = {user: i for i, user in enumerate(train_user_item_matrix.index)}


    for round_num in range(num_rounds):
        print(f"Training Round {round_num + 1}...")
        model.fit(train_interactions, epochs=epochs)
        test_user_ids = test_df['user_id'].unique()
        ndcg_scores = []
        hr_scores = []
        mrr_scores = []
        cv_scores = []
        engagement_levels_list = []

        for user_id in test_user_ids:
            actual_item = test_items.get(user_id, None)
            if actual_item is None:
                continue
            user_id_internal = user_id_map.get(user_id, None)
            if user_id_internal is None:
                print(f"User {user_id} not found in training data. Skipping evaluation.")
                continue

            engagement_level_row = df_modcloth_clean[df_modcloth_clean["user_id"] == user_id]
            if engagement_level_row.empty:
                 print(f"User {user_id} not found in df_rent_clean for engagement level. Skipping.")
                 continue
            engagement_level = engagement_level_row["user_activity"].values[0]
            engagement_levels_list.append(engagement_level)

            scores = model.predict(user_id_internal, np.arange(interactions.shape[1]))
            top_k_indices = np.argsort(-scores)[:k]
            top_k_items_predicted = [train_user_item_matrix.columns[i] for i in top_k_indices]


            relevance_scores = [1 if item == actual_item else 0 for item in top_k_items_predicted]
            ndcg_scores.append(ndcg_at_k(relevance_scores, k))

            hr_scores.append(hr_at_k([actual_item], top_k_items_predicted, k))
            mrr_scores.append(mrr_at_k([actual_item], top_k_items_predicted, k))
            cv_scores.append(coefficient_of_variance(np.array(relevance_scores)))


        ndcg = np.mean(ndcg_scores) if ndcg_scores else 0
        hr = np.mean(hr_scores) if hr_scores else 0
        mrr = np.mean(mrr_scores) if mrr_scores else 0
        cv = np.mean(cv_scores) if cv_scores else 0


        ndcg_low = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "Low"
        ]
        ndcg_moderate_high = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "High"
        ]


        cv_low = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "Low"
        ]
        cv_moderate_high = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "High"
        ]
        hr_low = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        hr_moderate_high = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "High"
        ]
        mrr_low = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        mrr_moderate_high = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "High"
        ]
        ucv_ndcg = calculate_ucv(ndcg_low, ndcg_moderate_high)

        ucv_hr = calculate_ucv(hr_low, hr_moderate_high)
        ucv_mrr = calculate_ucv(mrr_low, mrr_moderate_high)
        cv_low_mean = np.mean(cv_low) if cv_low else 0
        cv_moderate_high_mean = np.mean(cv_moderate_high) if cv_moderate_high else 0
        hr_low_mean = np.mean(hr_low) if hr_low else 0
        hr_moderate_high_mean = np.mean(hr_moderate_high) if hr_moderate_high else 0
        mrr_low_mean = np.mean(mrr_low) if mrr_low else 0
        mrr_moderate_high_mean = np.mean(mrr_moderate_high) if mrr_moderate_high else 0
        di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_moderate_high) if ndcg_moderate_high else np.inf
        gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_moderate_high)
        ndcg_low = np.mean(ndcg_low) if ndcg_low else 0
        ndcg_moderate_high = np.mean(ndcg_moderate_high) if ndcg_moderate_high else 0
        di_hr = calculate_disparate_impact(hr_low, hr_moderate_high) if hr_moderate_high else np.inf
        gru_hr = calculate_group_recommender_unfairness(hr_low, hr_moderate_high)
        di_mrr = calculate_disparate_impact(mrr_low, mrr_moderate_high) if mrr_moderate_high else np.inf
        gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_moderate_high)

        round_metrics[round_num + 1] = {
            "ndcg@k": ndcg,
            "hr@k": hr,
            "mrr@k": mrr,
            "ndcg_low": ndcg_low,
            "ndcg_moderate_high": ndcg_moderate_high,
            "DI_NDCG": di_ndcg,
            "GRU_NDCG": gru_ndcg,
            "CV": cv,
            "cv_low": cv_low_mean,
            "cv_moderate_high": cv_moderate_high_mean,
            "HR_LOW": hr_low_mean,
            "HR_MODERATE_HIGH": hr_moderate_high_mean,
            "MRR_LOW": mrr_low_mean,
            "MRR_MODERATE_HIGH": mrr_moderate_high_mean,
            "UCV_NDCG": ucv_ndcg,
            "UCV_HR": ucv_hr,
            "UCV_MRR": ucv_mrr,
            "DI_HR": di_hr,
            "GRU_HR": gru_hr,
            "DI_MRR": di_mrr,
            "GRU_MRR": gru_mrr
        }

        print(f"Metrics for round {round_num + 1}: {round_metrics[round_num + 1]}")


        if round_num == 0 and user_id_to_print in user_id_map:
            user_index = user_id_map[user_id_to_print]
            scores = model.predict(user_index, np.arange(interactions.shape[1]))
            top_items_indices = np.argsort(-scores)[:k]
            top_items_actual_ids = [train_user_item_matrix.columns[i] for i in top_items_indices]
            print(
                f"Top {k} recommendations for user {user_id_to_print}: {top_items_actual_ids}"
            )
        elif round_num == 0 and user_id_to_print not in user_id_map:
            print(
                f"User {user_id_to_print} not found in training data mapping. Skipping recommendations."
            )

    return round_metrics

interactions = csr_matrix(train_user_item_matrix.values)

# Pass the test_df and df_modcloth_clean to the function
round_results = evaluate_lightfm_multiple_rounds(interactions, test_df, df_modcloth_clean)

print("\nOverall Results:")
print(round_results)

Training Round 1...
User 759686 not found in training data. Skipping evaluation.
User 633700 not found in training data. Skipping evaluation.
User 847972 not found in training data. Skipping evaluation.
User 554236 not found in training data. Skipping evaluation.
User 867579 not found in training data. Skipping evaluation.
User 634356 not found in training data. Skipping evaluation.
User 902075 not found in training data. Skipping evaluation.
User 716249 not found in training data. Skipping evaluation.
User 901112 not found in training data. Skipping evaluation.
User 167789 not found in training data. Skipping evaluation.
User 608089 not found in training data. Skipping evaluation.
User 373717 not found in training data. Skipping evaluation.
User 382674 not found in training data. Skipping evaluation.
User 688594 not found in training data. Skipping evaluation.
User 89438 not found in training data. Skipping evaluation.
User 397724 not found in training data. Skipping evaluation.
User 

{1: {'ndcg@k': np.float64(0.049627805560888846), 'hr@k': np.float64(0.09268537074148296), 'mrr@k': np.float64(0.03556696726786907), 'ndcg_low': np.float64(0.06385123252424016), 'ndcg_moderate_high': np.float64(0.04321149408814475), 'DI_NDCG': np.float64(1.4776446376511199), 'GRU_NDCG': np.float64(0.020639738436095408), 'CV': np.float64(0.18537074148296592), 'cv_low': np.float64(0.22723609991941982), 'cv_moderate_high': np.float64(0.16648491457651762), 'HR_LOW': np.float64(0.11361804995970991), 'HR_MODERATE_HIGH': np.float64(0.08324245728825881), 'MRR_LOW': np.float64(0.04744829438624765), 'MRR_MODERATE_HIGH': np.float64(0.0302071973827699), 'UCV_NDCG': np.float64(3.2016326699527538), 'UCV_HR': np.float64(3.0558505532392504), 'UCV_MRR': np.float64(3.510971668452215), 'DI_HR': np.float64(1.3649050455858602), 'GRU_HR': np.float64(0.030375592671451096), 'DI_MRR': np.float64(1.5707612257107977), 'GRU_MRR': np.float64(0.017241097003477746)}, 2: {'ndcg@k': np.float64(0.05354725488766322), 'hr@k': np.float64(0.10145290581162325), 'mrr@k': np.float64(0.037913326653306614), 'ndcg_low': np.float64(0.06963513712380502), 'ndcg_moderate_high': np.float64(0.04628987144344223), 'DI_NDCG': np.float64(1.5043277276949547), 'GRU_NDCG': np.float64(0.02334526568036279), 'CV': np.float64(0.2029058116232465), 'cv_low': np.float64(0.25463336019339244), 'cv_moderate_high': np.float64(0.17957106506724826), 'HR_LOW': np.float64(0.12731668009669622), 'HR_MODERATE_HIGH': np.float64(0.08978553253362413), 'MRR_LOW': np.float64(0.050671501477303246), 'MRR_MODERATE_HIGH': np.float64(0.03215800315036956), 'UCV_NDCG': np.float64(3.027129091819404), 'UCV_HR': np.float64(2.9010323230420836), 'UCV_MRR': np.float64(3.301742394071298), 'DI_HR': np.float64(1.4180088540324345), 'GRU_HR': np.float64(0.03753114756307209), 'DI_MRR': np.float64(1.5757042264211896), 'GRU_MRR': np.float64(0.018513498326933685)}, 3: {'ndcg@k': np.float64(0.050472077823825415), 'hr@k': np.float64(0.09493987975951904), 'mrr@k': np.float64(0.03599699398797595), 'ndcg_low': np.float64(0.060840466616751694), 'ndcg_moderate_high': np.float64(0.04579480756136758), 'DI_NDCG': np.float64(1.3285450874582692), 'GRU_NDCG': np.float64(0.015045659055384117), 'CV': np.float64(0.18987975951903807), 'cv_low': np.float64(0.22240128928283642), 'cv_moderate_high': np.float64(0.17520901490367138), 'HR_LOW': np.float64(0.11120064464141821), 'HR_MODERATE_HIGH': np.float64(0.08760450745183569), 'MRR_LOW': np.float64(0.04437281762019876), 'MRR_MODERATE_HIGH': np.float64(0.03221858718041924), 'UCV_NDCG': np.float64(3.1774683647540645), 'UCV_HR': np.float64(3.0271820567713226), 'UCV_MRR': np.float64(3.4980818486958953), 'DI_HR': np.float64(1.2693484373798403), 'GRU_HR': np.float64(0.02359613718958252), 'DI_MRR': np.float64(1.3772428124088016), 'GRU_MRR': np.float64(0.01215423043977952)}}

In [None]:
###############################################
# 3. Hyperparameter tuning BPR
###############################################
from sklearn.model_selection import ParameterGrid
from lightfm.evaluation import auc_score
param_grid = {
    'no_components': [10, 20, 50],
    'learning_rate': [0.01, 0.05, 0.1],
    'item_alpha': [1e-06, 1e-05],
    'user_alpha': [1e-05, 1e-06],
}

best_auc = 0
best_params = {}


grid = ParameterGrid(param_grid)

for params in grid:
  print(f"Training with parameters: {params}")
  model = LightFM(loss='bpr', **params)
  model.fit(interactions, epochs=30, num_threads=2)
  # AUC = The area under the ROC curve (AUC)
  # https://towardsdatascience.com/understanding-the-roc-curve-and-auc-dd4f9a192ecb/

  auc = auc_score(model, interactions, num_threads=2).mean()
  print(f"AUC score: {auc}")

  if auc > best_auc:
    best_auc = auc
    best_params = params


print(f"\nBest parameters: {best_params}, Best AUC: {best_auc}")


best_model = LightFM(loss='bpr', **best_params)
best_model.fit(interactions, epochs=30, num_threads=2)

best_model


Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'no_components': 10, 'user_alpha': 1e-05}
AUC score: 0.7441076636314392
Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'no_components': 10, 'user_alpha': 1e-06}
AUC score: 0.7489626407623291
Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'no_components': 20, 'user_alpha': 1e-05}
AUC score: 0.7508640885353088
Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'no_components': 20, 'user_alpha': 1e-06}
AUC score: 0.7629712224006653
Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'no_components': 50, 'user_alpha': 1e-05}
AUC score: 0.721182107925415
Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'no_components': 50, 'user_alpha': 1e-06}
AUC score: 0.7296546697616577
Training with parameters: {'item_alpha': 1e-06, 'learning_rate': 0.05, 'no_components': 10, 'user_alpha': 1e-05}
AUC score: 0.9691360592842102


<lightfm.lightfm.LightFM at 0x7bd2a2df34d0>

In [None]:
#Best parameters: {'item_alpha': 1e-05, 'learning_rate': 0.1, 'no_components': 50, 'user_alpha': 1e-06}, Best AUC: 0.9944227337837219
#<lightfm.lightfm.LightFM at 0x7bd2a2df34d0>

best_model_bpr = LightFM(loss='bpr', no_components=50, learning_rate=0.1, item_alpha=1e-05, user_alpha=1e-06)
best_model_bpr.fit(interactions, epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x7d02418e0990>

In [None]:
###############################################
# 3. BPR Model
###############################################
import numpy as np
from lightfm import LightFM
from scipy.sparse import coo_matrix
from lightfm.evaluation import precision_at_k, recall_at_k


interactions = coo_matrix(train_user_item_matrix.values)

model = LightFM(loss='bpr')
 # https://making.lyst.com/lightfm/docs/lightfm.html


model.fit(interactions, epochs=30, num_threads=2)

train_precision = precision_at_k(model, interactions, k=5).mean()
train_recall = recall_at_k(model, interactions, k=5).mean()


print(f"Train Precision at k=5: {train_precision}")
print(f"Train Recall at k=5: {train_recall}")


user_id = 55
scores = model.predict(user_id, np.arange(interactions.shape[1]))
top_items = np.argsort(-scores)

print("Top 10 recommended items for user 55:", top_items[:10])

Train Precision at k=5: 0.2809772491455078
Train Recall at k=5: 0.6442635265769372
Top 10 recommended items for user 55: [397 460 838 698 399  74 415 743 827 751]


In [None]:
test_items = test_df.groupby("user_id")["item_id"].first().to_dict()

def calculate_metrics_for_user(user, actual_item, recs, group):
    relevance_scores = [1 if item == actual_item else 0 for item in recs]
    ndcg = ndcg_at_k(relevance_scores, k=6)
    hr = hr_at_k([actual_item], recs, k=5)
    mrr = mrr_at_k([actual_item], recs, k=5)
    cv = coefficient_of_variance(relevance_scores)
    return [user, ndcg, hr, mrr, cv, group]

metrics_lightfm = []
for user_id in train_user_item_matrix.index:
    actual_item = test_items.get(user_id, None)
    if actual_item is None:
        continue

    engagement_level = df_modcloth_clean[df_modcloth_clean["user_id"] == user_id][
        "user_activity"
    ].values[0]
    try:
        user_id_internal = train_user_item_matrix.index.get_loc(user_id)
    except KeyError:
        print(f"User {user_id} not found in training data. Skipping.")
        continue

    scores = best_model_bpr.predict(user_id_internal, np.arange(interactions.shape[1]))
    top_items = np.argsort(-scores)
    recs = [train_user_item_matrix.columns[i] for i in top_items]


    metrics_lightfm.append(
        calculate_metrics_for_user(user_id, actual_item, recs, engagement_level)
    )

metrics_lightfm_df = pd.DataFrame(
    metrics_lightfm, columns=["User", "NDCG@K", "HR@K", "MRR@K","CV", "Engagement Group"]
)
metrics_lightfm_df['User'] = pd.to_numeric(metrics_lightfm_df['User'], errors='coerce')


grouped_metrics_lightfm = metrics_lightfm_df.groupby("Engagement Group").agg(
    {"NDCG@K": "mean", "HR@K": "mean", "MRR@K": "mean", "CV":"mean"}
)
print("\nLightFM Metrics by User Engagement Level:")
grouped_metrics_lightfm



LightFM Metrics by User Engagement Level:


Unnamed: 0_level_0,NDCG@K,HR@K,MRR@K,CV
Engagement Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
High,0.027698,0.049437,0.014074,30.77748
Low,0.036172,0.063658,0.023274,30.698086


In [None]:
###############################################
# 3. Multiple rounds BPR
###############################################
from lightfm import LightFM
from scipy.sparse import csr_matrix
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.cross_validation import random_train_test_split
from sklearn.metrics import ndcg_score



def evaluate_lightfm_multiple_rounds(
    interactions, test_df, df_rent_clean, num_rounds=3, k=5, epochs=30, user_id_to_print=55
):
    """
    Evaluates LightFM model with multiple training rounds, fairness metrics, and prints recommendations for a user.

    Args:
        interactions: The user-item interaction matrix (sparse).
        test_df: The test DataFrame.
        df_rent_clean: The cleaned main DataFrame with user activity.
        num_rounds: The number of training rounds.
        k: The value of k for precision@k, recall@k metrics.
        epochs: The number of epochs for each training round.
        user_id_to_print: The user ID for which recommendations will be printed.

    Returns:
        A dictionary containing metrics for each round.
    """

    train_interactions, test_interactions = random_train_test_split(
        interactions, test_percentage=0.2, random_state=42
    )

    round_metrics = {}
    model = LightFM(loss='bpr', no_components=50, learning_rate=0.1, item_alpha=1e-05, user_alpha=1e-06)# best model

    test_items = test_df.groupby("user_id")["item_id"].first().to_dict()

    user_id_map = {user: i for i, user in enumerate(train_user_item_matrix.index)}


    for round_num in range(num_rounds):
        print(f"Training Round {round_num + 1}...")
        model.fit(train_interactions, epochs=epochs)

        test_user_ids = test_df['user_id'].unique()


        ndcg_scores = []
        hr_scores = []
        mrr_scores = []
        cv_scores = [] #<- IGNORE
        engagement_levels_list = []

        for user_id in test_user_ids:
            actual_item = test_items.get(user_id, None)
            if actual_item is None:
                continue
            user_id_internal = user_id_map.get(user_id, None)
            if user_id_internal is None:
                print(f"User {user_id} not found in training data. Skipping evaluation.")
                continue
            engagement_level_row = df_modcloth_clean[df_modcloth_clean["user_id"] == user_id]
            if engagement_level_row.empty:
                 print(f"User {user_id} not found in df_rent_clean for engagement level. Skipping.")
                 continue
            engagement_level = engagement_level_row["user_activity"].values[0]
            engagement_levels_list.append(engagement_level)
            scores = model.predict(user_id_internal, np.arange(interactions.shape[1]))
            top_k_indices = np.argsort(-scores)[:k]
            top_k_items_predicted = [train_user_item_matrix.columns[i] for i in top_k_indices]
            relevance_scores = [1 if item == actual_item else 0 for item in top_k_items_predicted]
            ndcg_scores.append(ndcg_at_k(relevance_scores, k))

            hr_scores.append(hr_at_k([actual_item], top_k_items_predicted, k))
            mrr_scores.append(mrr_at_k([actual_item], top_k_items_predicted, k))
            cv_scores.append(coefficient_of_variance(np.array(relevance_scores)))
        ndcg = np.mean(ndcg_scores) if ndcg_scores else 0
        hr = np.mean(hr_scores) if hr_scores else 0
        mrr = np.mean(mrr_scores) if mrr_scores else 0
        cv = np.mean(cv_scores) if cv_scores else 0
        ndcg_low = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "Low"
        ]
        ndcg_moderate_high = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "High"
        ]
        cv_low = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "Low"
        ]
        cv_moderate_high = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "High"
        ]
        hr_low = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        hr_moderate_high = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "High"
        ]
        mrr_low = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        mrr_moderate_high = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "High"
        ]

        ucv_ndcg = calculate_ucv(ndcg_low, ndcg_moderate_high)

        ucv_hr = calculate_ucv(hr_low, hr_moderate_high)
        ucv_mrr = calculate_ucv(mrr_low, mrr_moderate_high)
        cv_low_mean = np.mean(cv_low) if cv_low else 0
        cv_moderate_high_mean = np.mean(cv_moderate_high) if cv_moderate_high else 0
        hr_low_mean = np.mean(hr_low) if hr_low else 0
        hr_moderate_high_mean = np.mean(hr_moderate_high) if hr_moderate_high else 0
        mrr_low_mean = np.mean(mrr_low) if mrr_low else 0
        mrr_moderate_high_mean = np.mean(mrr_moderate_high) if mrr_moderate_high else 0
        di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_moderate_high) if ndcg_moderate_high else np.inf
        gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_moderate_high)
        ndcg_low = np.mean(ndcg_low) if ndcg_low else 0
        ndcg_moderate_high = np.mean(ndcg_moderate_high) if ndcg_moderate_high else 0
        di_hr = calculate_disparate_impact(hr_low, hr_moderate_high) if hr_moderate_high else np.inf
        gru_hr = calculate_group_recommender_unfairness(hr_low, hr_moderate_high)
        di_mrr = calculate_disparate_impact(mrr_low, mrr_moderate_high) if mrr_moderate_high else np.inf
        gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_moderate_high)
        round_metrics[round_num + 1] = {
            "ndcg@k": ndcg,
            "hr@k": hr,
            "mrr@k": mrr,
            "DI_NDCG": di_ndcg,
            "GRU_NDCG": gru_ndcg,
            "ndcg_low": ndcg_low,
            "ndcg_moderate_high": ndcg_moderate_high,
            "CV": cv,
            "cv_low": cv_low_mean,
            "cv_moderate_high": cv_moderate_high_mean,
            "HR_LOW": hr_low_mean,
            "HR_MODERATE_HIGH": hr_moderate_high_mean,
            "MRR_LOW": mrr_low_mean,
            "MRR_MODERATE_HIGH": mrr_moderate_high_mean,
            "UCV_NDCG": ucv_ndcg,
            "UCV_HR": ucv_hr,
            "UCV_MRR": ucv_mrr,
            "DI_HR": di_hr,
            "GRU_HR": gru_hr,
            "DI_MRR": di_mrr,
            "GRU_MRR": gru_mrr
        }

        print(f"Metrics for round {round_num + 1}: {round_metrics[round_num + 1]}")


        if round_num == 0 and user_id_to_print in user_id_map:
            user_index = user_id_map[user_id_to_print]
            scores = model.predict(user_index, np.arange(interactions.shape[1]))
            top_items_indices = np.argsort(-scores)[:k]
            top_items_actual_ids = [train_user_item_matrix.columns[i] for i in top_items_indices]
            print(
                f"Top {k} recommendations for user {user_id_to_print}: {top_items_actual_ids}"
            )
        elif round_num == 0 and user_id_to_print not in user_id_map:
            print(
                f"User {user_id_to_print} not found in training data mapping. Skipping recommendations."
            )

    return round_metrics

interactions = csr_matrix(train_user_item_matrix.values)

round_results = evaluate_lightfm_multiple_rounds(interactions, test_df, df_modcloth_clean)

print("\nOverall Results:")
print(round_results)

Training Round 1...
User 759686 not found in training data. Skipping evaluation.
User 633700 not found in training data. Skipping evaluation.
User 847972 not found in training data. Skipping evaluation.
User 554236 not found in training data. Skipping evaluation.
User 867579 not found in training data. Skipping evaluation.
User 634356 not found in training data. Skipping evaluation.
User 902075 not found in training data. Skipping evaluation.
User 716249 not found in training data. Skipping evaluation.
User 901112 not found in training data. Skipping evaluation.
User 167789 not found in training data. Skipping evaluation.
User 608089 not found in training data. Skipping evaluation.
User 373717 not found in training data. Skipping evaluation.
User 382674 not found in training data. Skipping evaluation.
User 688594 not found in training data. Skipping evaluation.
User 89438 not found in training data. Skipping evaluation.
User 397724 not found in training data. Skipping evaluation.
User 

{1: {'ndcg@k': np.float64(0.023029307258231557), 'hr@k': np.float64(0.04809619238476954), 'mrr@k': np.float64(0.014946559786239147), 'DI_NDCG': np.float64(1.4778814623636856), 'GRU_NDCG': np.float64(0.009581807245977642), 'ndcg_low': np.float64(0.029632401379896006), 'ndcg_moderate_high': np.float64(0.020050594133918364), 'CV': np.float64(0.09619238476953908), 'cv_low': np.float64(0.11281224818694602), 'cv_moderate_high': np.float64(0.08869501999272991), 'HR_LOW': np.float64(0.05640612409347301), 'HR_MODERATE_HIGH': np.float64(0.044347509996364956), 'MRR_LOW': np.float64(0.02078968573730862), 'MRR_MODERATE_HIGH': np.float64(0.012310674906094754), 'UCV_NDCG': np.float64(4.438319338649441), 'UCV_HR': np.float64(4.366083550530732), 'UCV_MRR': np.float64(4.6124678917020905), 'DI_HR': np.float64(1.271911863779871), 'GRU_HR': np.float64(0.012058614097108053), 'DI_MRR': np.float64(1.6887527203740949), 'GRU_MRR': np.float64(0.008479010831213865)}, 2: {'ndcg@k': np.float64(0.021655243444802148), 'hr@k': np.float64(0.04433867735470942), 'mrr@k': np.float64(0.014307782231128923), 'DI_NDCG': np.float64(1.611799830483096), 'GRU_NDCG': np.float64(0.011131550442012954), 'ndcg_low': np.float64(0.029326309393193337), 'ndcg_moderate_high': np.float64(0.018194758951180383), 'CV': np.float64(0.08867735470941884), 'cv_low': np.float64(0.11281224818694602), 'cv_moderate_high': np.float64(0.07778989458378771), 'HR_LOW': np.float64(0.05640612409347301), 'HR_MODERATE_HIGH': np.float64(0.038894947291893855), 'MRR_LOW': np.float64(0.020413644910018798), 'MRR_MODERATE_HIGH': np.float64(0.011553374530473767), 'UCV_NDCG': np.float64(4.622166768507222), 'UCV_HR': np.float64(4.530500620725936), 'UCV_MRR': np.float64(4.8448774469366835), 'DI_HR': np.float64(1.4502172652443388), 'GRU_HR': np.float64(0.017511176801579154), 'DI_MRR': np.float64(1.7668989139211866), 'GRU_MRR': np.float64(0.00886027037954503)}, 3: {'ndcg@k': np.float64(0.022192185883416222), 'hr@k': np.float64(0.04534068136272545), 'mrr@k': np.float64(0.014679358717434868), 'DI_NDCG': np.float64(1.225991274950039), 'GRU_NDCG': np.float64(0.004686026838629655), 'ndcg_low': np.float64(0.025421459388694324), 'ndcg_moderate_high': np.float64(0.02073543255006467), 'CV': np.float64(0.0906813627254509), 'cv_low': np.float64(0.09669621273166801), 'cv_moderate_high': np.float64(0.08796801163213377), 'HR_LOW': np.float64(0.048348106365834004), 'HR_MODERATE_HIGH': np.float64(0.04398400581606689), 'MRR_LOW': np.float64(0.01786193929626645), 'MRR_MODERATE_HIGH': np.float64(0.01324366896885981), 'UCV_NDCG': np.float64(4.639134903641675), 'UCV_HR': np.float64(4.549364487524867), 'UCV_MRR': np.float64(4.849305358356572), 'DI_HR': np.float64(1.0992201703504905), 'GRU_HR': np.float64(0.0043641005497671165), 'DI_MRR': np.float64(1.34871532490473), 'GRU_MRR': np.float64(0.004618270327406639)}}