In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
file_path = 'renttherunway_final_data.json' 
df_rent = pd.read_json(file_path, lines=True)

# Preprocessing

In [3]:
df_rent_clean = df_rent.dropna(subset=["user_id", "item_id", "rating","age"]).drop_duplicates()

In [4]:
###############################################
# 3. Explicit Feedback
###############################################

df_rent_clean['interaction'] = df_rent_clean['rating']

In [5]:
def calculate_sparsity(df):
    """Calculates sparsity for each numerical feature in the DataFrame."""
    numerical_features = df.select_dtypes(include=np.number).columns
    sparsity_results = {}

    for feature in numerical_features:
        sparsity = df[feature].isnull().sum() / len(df) 
        sparsity_results[feature] = sparsity

    return sparsity_results


sparsity_dict = calculate_sparsity(df_rent_clean)
print(sparsity_dict)

{'user_id': 0.0, 'item_id': 0.0, 'rating': 0.0, 'size': 0.0, 'age': 0.0, 'interaction': 0.0}


In [6]:
def preprocess_data(df, sparsity_threshold=0.50):
    """Preprocesses the data by removing numerical features with high sparsity."""
    for col in df.select_dtypes(include=np.number).columns:
        sparsity = df[col].isnull().sum() / len(df)
        if sparsity > sparsity_threshold:
            print(f"Dropping sparse feature: {col} (Sparsity: {sparsity:.2f})")
            df = df.drop(col, axis=1)
    return df

In [7]:
df_rent_clean = preprocess_data(df_rent_clean, sparsity_threshold=0.50)


user_interaction_counts = df_rent_clean.groupby('user_id')['rating'].count()
users_with_enough_interactions = user_interaction_counts[user_interaction_counts >= 2].index
df_rent_clean = df_rent_clean[df_rent_clean["user_id"].isin(users_with_enough_interactions)]
print(f"Number of users with more than 2 interactions: {len(users_with_enough_interactions)}")

Number of users with more than 2 interactions: 33544


In [9]:
median_interaction_strength = df_rent_clean.groupby('user_id')['interaction'].sum().median()

df_rent_clean['user_activity'] = df_rent_clean.groupby('user_id')['interaction'].transform('sum').map(
    lambda x: 'Low' if x < median_interaction_strength else 'High'
)

print(df_rent_clean['user_activity'].value_counts())

user_activity
High    99471
Low     20515
Name: count, dtype: int64


In [10]:

df_rent_clean = df_rent_clean.sort_values('review_date')


train_size = int(0.6 * len(df_rent_clean)) # 60 %
val_size = int(0.2 * len(df_rent_clean))


train_df = df_rent_clean[:train_size]
val_df = df_rent_clean[train_size:train_size + val_size]
test_df = df_rent_clean[train_size + val_size:]


print("Train set shape:", train_df.shape)
print("Validation set shape:", val_df.shape)
print("Test set shape:", test_df.shape)

Train set shape: (71991, 17)
Validation set shape: (23997, 17)
Test set shape: (23998, 17)


In [11]:
###############################################
# 3. User Item Matrix -Train
###############################################

train_user_item_matrix = train_df.pivot_table(
    index="user_id",
    columns="item_id",
    values="interaction",
    aggfunc="sum",
    fill_value=0
)




#Definition effectiveness

In [12]:
###############################################
# 3. Utility/Ranking metrics
###############################################
def dcg_at_k_recursive(relevance_scores, k, b=2):
    """
    Calculate the Discounted Cumulative Gain at rank k recursively.

    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :param b: The base of the logarithm (typically 2).
    :return: DCG at rank k.
    """
    
    k = min(k, len(relevance_scores))

    if k == 0:
        return 0.0

    if k < b:
        return np.sum([rel / np.log2(idx + 2) for idx, rel in enumerate(relevance_scores[:k])])

    else:
        dcg_k_minus_1 = dcg_at_k_recursive(relevance_scores, k - 1, b)
        rel_k = relevance_scores[k - 1] 
        return dcg_k_minus_1 + (rel_k / np.log2(k + 1))

def dcg_at_k(relevance_scores, k, b=2):
    """
    Calculate the Discounted Cumulative Gain at rank k (non-recursive).
    """
    dcg = 0
    for idx, rel in enumerate(relevance_scores[:k]):
        dcg += rel / np.log2(idx + 2)
    return dcg
def idcg_at_k(relevance_scores, k):
    """
    Calculate the Ideal Discounted Cumulative Gain at rank k.
    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :return: Ideal DCG at rank k.
    """
    relevance_scores = sorted(relevance_scores, reverse=True)
    return dcg_at_k(relevance_scores, k)

def ndcg_at_k(relevance_scores, k):
    """
    Calculate the Normalized Discounted Cumulative Gain at rank k.
    :param relevance_scores: A list or array of relevance scores for the ranked items.
    :param k: The rank at which to stop (k items to evaluate).
    :return: nDCG at rank k.
    """
    dcg = dcg_at_k_recursive(relevance_scores, k)
    idcg = idcg_at_k(relevance_scores, k)
    if idcg == 0:
        return 0
    return dcg / idcg


In [13]:
def hr_at_k(actual, predicted, k=5):
    """
    Computes Hit Rate (HR) at rank k.

    Args:
        actual: list of relevant items
        predicted: ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        Hit Rate at rank k.
    """

    for item in predicted[:k]:  
        if item in actual:
            return 1 
    return 0


def hit_rate_at_k(actual, predicted, k=5):
    """
    Computes Mean Hit Rate (HR) at rank k for all users.

    Args:
        actual: list of relevant items for each user
        predicted: list of predicted ranked items for each user
        k: rank cutoff (default is 5)

    Returns:
        HR at rank k.
    """

    if not actual or len(predicted) == 0:
        return 0.0

    hr_scores = []
    for user_actual, user_predicted in zip(actual, predicted):
        hr = hr_at_k(user_actual, user_predicted, k)
        hr_scores.append(hr)

    return np.mean(hr_scores)

In [14]:
def rr_at_k(actual, predicted, k=5):
    """
    Computes Reciprocal Rank at rank k.

    Args:
        actual: the relevant item(s)
        predicted: the ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        Reciprocal Rank at rank k.
    """

    for i, item in enumerate(predicted[:k]):
        if item == actual:
            return 1 / (i + 1) 
    return 0.0  


def mrr_at_k(actual, predicted, k=5):
    """
    Computes Mean Reciprocal Rank (MRR) at rank k.

    Args:
        actual: list of relevant items (can be multiple)
        predicted: ranked list of items
        k: rank cutoff (default is 5)

    Returns:
        MRR at rank k.
    """

    if actual is None or len(predicted) == 0:
        return 0.0


    rr_scores = []
    for item in actual:
        rr = rr_at_k(item, predicted, k)
        rr_scores.append(rr)

   
    return np.mean(rr_scores)



#DEfinition Fairness metrics

In [15]:
def calculate_disparate_impact(protected_outcomes, privileged_outcomes):
    """
    Args:
        protected_outcomes: List of binary outcomes (1=favorable) for the protected group.
        privileged_outcomes: List of binary outcomes for the privileged group.
    Returns:
        Disparate impact ratio.
    """
    protected_rate = np.mean(protected_outcomes)
    privileged_rate = np.mean(privileged_outcomes)

    if privileged_rate == 0:
        return np.inf  

    return protected_rate / privileged_rate


def calculate_group_recommender_unfairness(group1_metrics, group2_metrics):
  """
    Calculates the absolute difference in mean metrics between two groups.
    This metric quantifies the unfairness of a recommender system by examining
    the absolute difference in average performance between different user groups.

    Args:
        group1_metrics (list or numpy.ndarray): A list or numpy array of metrics for group 1.
        group2_metrics (list or numpy.ndarray): A list or numpy array of metrics for group 2.

    Returns:
        float: The absolute difference in mean metrics between the two groups.
  """
  return np.abs(np.mean(group1_metrics) - np.mean(group2_metrics))






In [16]:


def coefficient_of_variance(group):
    """Calculates the coefficient of variance for a group."""
    return np.std(group) / np.mean(group) if np.mean(group) != 0 else 0
def coefficient_of_variation(arr):
    mean_val = np.mean(arr)
    if mean_val == 0:
        return 0
    return np.std(arr) / mean_val

def calculate_ucv(metric_low_group, metric_high_group):
    
    cv_low = coefficient_of_variation(metric_low_group) if len(metric_low_group) > 0 else 0
    cv_high = coefficient_of_variation(metric_high_group) if len(metric_high_group) > 0 else 0
    return (cv_low + cv_high) / 2


# MEtrics

In [17]:
pip install lightfm # install lightfm in terminal


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.11 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [18]:
import numpy as np
from lightfm import LightFM
from scipy.sparse import coo_matrix # sparse matrix
from lightfm.evaluation import precision_at_k, recall_at_k

interactions = coo_matrix(train_user_item_matrix.values)


model = LightFM(loss='warp')
 # https://making.lyst.com/lightfm/docs/lightfm.html


model.fit(interactions, epochs=30, num_threads=2)

train_precision = precision_at_k(model, interactions, k=5).mean()
train_recall = recall_at_k(model, interactions, k=5).mean()


print(f"Train Precision at k=5: {train_precision}")
print(f"Train Recall at k=5: {train_recall}")


user_id = 55
scores = model.predict(user_id, np.arange(interactions.shape[1]))

#descending order
top_items = np.argsort(-scores)

print("Top 10 recommended items for user 55:", top_items[:10])



Train Precision at k=5: 0.09307190775871277
Train Recall at k=5: 0.3034202631760367
Top 10 recommended items for user 55: [  82 1795 1552 2844    1 2859  356  159 2253 2063]


In [19]:
#Best parameters: {'item_alpha': 1e-06, 'learning_rate': 0.01, 'loss': 'warp', 'no_components': 50, 'user_alpha': 1e-05}, Best AUC: 0.775144100189209
#<lightfm.lightfm.LightFM at 0x7b035c2baf10>

best_model = LightFM(loss='warp', no_components=50, learning_rate=0.01, item_alpha=1e-06, user_alpha=0.0)
best_model.fit(interactions, epochs=30, num_threads=2)


<lightfm.lightfm.LightFM at 0x1599e2210>

In [20]:
test_items = test_df.groupby("user_id")["item_id"].first().to_dict()

def calculate_metrics_for_user(user, actual_item, recs, group):
    relevance_scores = [1 if item == actual_item else 0 for item in recs]
    ndcg = ndcg_at_k(relevance_scores, k=6)
    hr = hr_at_k([actual_item], recs, k=5)
    mrr = mrr_at_k([actual_item], recs, k=5)
    cv = coefficient_of_variance(relevance_scores)
    return [user, ndcg, hr, mrr, cv, group]



metrics_lightfm = []
for user_id in train_user_item_matrix.index:
    actual_item = test_items.get(user_id, None)
    if actual_item is None:
        continue

    
    engagement_level = df_rent_clean[df_rent_clean["user_id"] == user_id][
        "user_activity"
    ].values[0]
    try:
        user_id_internal = train_user_item_matrix.index.get_loc(user_id)
    except KeyError:
        
        print(f"User {user_id} not found in training data. Skipping.")
        continue

   
    scores = best_model.predict(user_id_internal, np.arange(interactions.shape[1])) 
    top_items = np.argsort(-scores)
    recs = [train_user_item_matrix.columns[i] for i in top_items]

    
    metrics_lightfm.append(
        calculate_metrics_for_user(user_id, actual_item, recs, engagement_level)
    )

metrics_lightfm_df = pd.DataFrame(
    metrics_lightfm, columns=["User", "NDCG@K", "HR@K", "MRR@K","CV", "Engagement Group"]
)
metrics_lightfm_df['User'] = pd.to_numeric(metrics_lightfm_df['User'], errors='coerce')


grouped_metrics_lightfm = metrics_lightfm_df.groupby("Engagement Group").agg(
    {"NDCG@K": "mean", "HR@K": "mean", "MRR@K": "mean", "CV":"mean"}
)
print("\nLightFM Metrics by User Engagement Level:")
grouped_metrics_lightfm



LightFM Metrics by User Engagement Level:


Unnamed: 0_level_0,NDCG@K,HR@K,MRR@K,CV
Engagement Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
High,0.012781,0.020487,0.00835,71.879668
Low,0.029863,0.045179,0.02105,72.809042


In [21]:
def calculate_average_metrics(metrics_df):
  """
  Calculates the average of NDCG, HR, and MRR from a DataFrame.

  Args:
    metrics_df: A pandas DataFrame with 'NDCG@K', 'HR@K', and 'MRR@K' columns.

  Returns:
    A dictionary containing the average NDCG, HR, and MRR.
  """
  average_ndcg = metrics_df['NDCG@K'].mean()
  average_hr = metrics_df['HR@K'].mean()
  average_mrr = metrics_df['MRR@K'].mean()
  average_cv = metrics_df['CV'].mean()
  return {
      'Average NDCG@K': average_ndcg,
      'Average HR@K': average_hr,
      'Average MRR@K': average_mrr,
      'Average CV': average_cv
  }


overall_avg_metrics_lightfm = calculate_average_metrics(metrics_lightfm_df)
print("\nOverall Average LightFM Metrics:")
overall_avg_metrics_lightfm


Overall Average LightFM Metrics:


{'Average NDCG@K': 0.015596212795900101,
 'Average HR@K': 0.024555555555555556,
 'Average MRR@K': 0.010442592592592592,
 'Average CV': 72.03280793268206}

In [22]:

ndcg_low = grouped_metrics_lightfm.loc["Low", "NDCG@K"]
ndcg_moderate_high = grouped_metrics_lightfm.loc["High", "NDCG@K"]


di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_moderate_high)
gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_moderate_high)

cv_low_ndcg = coefficient_of_variance(ndcg_low)
cv_moderate_high_ndcg = coefficient_of_variance(ndcg_moderate_high)


print(f"\nLightFM - NDCG Metrics:")
print(f"Disparate Impact: {di_ndcg}")
print(f"Group Recommender Unfairness: {gru_ndcg}")
print(f"Coefficient of Variance Low: {cv_low_ndcg}")
print(f"Coefficient of Variance High: {cv_moderate_high_ndcg}")


hr_low = grouped_metrics_lightfm.loc["Low", "HR@K"]
hr_moderate_high = grouped_metrics_lightfm.loc["High", "HR@K"]


di_hr = calculate_disparate_impact(hr_low, hr_moderate_high)
gru_hr = calculate_group_recommender_unfairness(hr_low, hr_moderate_high)

cv_low_hr = coefficient_of_variance(hr_low)
cv_moderate_high_hr = coefficient_of_variance(hr_moderate_high)


print(f"\nLightFM - HR@K Metrics:")
print(f"Disparate Impact: {di_hr}")
print(f"Group Recommender Unfairness: {gru_hr}")
print(f"Coefficient of Variance Low: {cv_low_hr}")
print(f"Coefficient of Variance High: {cv_moderate_high_hr}")



mrr_low = grouped_metrics_lightfm.loc["Low", "MRR@K"]
mrr_moderate_high = grouped_metrics_lightfm.loc["High", "MRR@K"]

di_mrr = calculate_disparate_impact(mrr_low, mrr_moderate_high)
gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_moderate_high)

cv_low_mrr = coefficient_of_variance(mrr_low)
cv_moderate_high_mrr = coefficient_of_variance(mrr_moderate_high)


print(f"\nLightFM - MRR@K Metrics:")
print(f"Disparate Impact: {di_mrr}")
print(f"Group Recommender Unfairness: {gru_mrr}")
print(f"Coefficient of Variance Low: {cv_low_mrr}")
print(f"Coefficient of Variance High: {cv_moderate_high_mrr}")



LightFM - NDCG Metrics:
Disparate Impact: 2.336459514210217
Group Recommender Unfairness: 0.017081941766557376
Coefficient of Variance Low: 0.0
Coefficient of Variance High: 0.0

LightFM - HR@K Metrics:
Disparate Impact: 2.2052482244660263
Group Recommender Unfairness: 0.024691795472631107
Coefficient of Variance Low: 0.0
Coefficient of Variance High: 0.0

LightFM - MRR@K Metrics:
Disparate Impact: 2.5209304315970447
Group Recommender Unfairness: 0.012699711776405637
Coefficient of Variance Low: 0.0
Coefficient of Variance High: 0.0


# CAN CRASH

In [44]:
import numpy as np
from lightfm import LightFM
from scipy.sparse import csr_matrix  # sparse matrix
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.cross_validation import random_train_test_split
from sklearn.metrics import ndcg_score
import pandas as pd 


def evaluate_lightfm_multiple_rounds(
    interactions, test_df, df_rent_clean, num_rounds=3, k=5, epochs=30, user_id_to_print=55
):
    """
    Evaluates LightFM model with multiple training rounds, fairness metrics, and prints recommendations for a user.

    Args:
        interactions: The user-item interaction matrix (sparse).
        test_df: The test DataFrame.
        df_rent_clean: The cleaned main DataFrame with user activity.
        num_rounds: The number of training rounds.
        k: The value of k for precision@k, recall@k metrics.
        epochs: The number of epochs for each training round.
        user_id_to_print: The user ID for which recommendations will be printed.

    Returns:
        A dictionary containing metrics for each round.
    """

    train_interactions, test_interactions = random_train_test_split(
        interactions, test_percentage=0.2, random_state=42
    )

    round_metrics = {}
    model = LightFM(loss='warp', no_components=50, learning_rate=0.1, item_alpha=1e-05, user_alpha=1e-06)


    test_items = test_df.groupby("user_id")["item_id"].first().to_dict()


    user_id_map = {user: i for i, user in enumerate(train_user_item_matrix.index)}


    for round_num in range(num_rounds):
        print(f"Training Round {round_num + 1}...")
        model.fit(train_interactions, epochs=epochs)

        test_user_ids = test_df['user_id'].unique()

        ndcg_scores = []
        hr_scores = []
        mrr_scores = []
        cv_scores = []
        engagement_levels_list = []

        for user_id in test_user_ids:
            actual_item = test_items.get(user_id, None)
            if actual_item is None:
                continue


            user_id_internal = user_id_map.get(user_id, None)
            if user_id_internal is None:
                
                print(f"User {user_id} not found in training data. Skipping evaluation.")
                continue

            engagement_level_row = df_rent_clean[df_rent_clean["user_id"] == user_id]
            if engagement_level_row.empty:
                 print(f"User {user_id} not found in df_rent_clean for engagement level. Skipping.")
                 continue
            engagement_level = engagement_level_row["user_activity"].values[0]
            engagement_levels_list.append(engagement_level)


            scores = model.predict(user_id_internal, np.arange(interactions.shape[1]))
            top_k_indices = np.argsort(-scores)[:k]
            top_k_items_predicted = [train_user_item_matrix.columns[i] for i in top_k_indices]

            relevance_scores = [1 if item == actual_item else 0 for item in top_k_items_predicted]

            
            ndcg_scores.append(ndcg_at_k(relevance_scores, k))

            hr_scores.append(hr_at_k([actual_item], top_k_items_predicted, k))
            mrr_scores.append(mrr_at_k([actual_item], top_k_items_predicted, k))
            cv_scores.append(coefficient_of_variance(np.array(relevance_scores))) 


        ndcg = np.mean(ndcg_scores) if ndcg_scores else 0
        hr = np.mean(hr_scores) if hr_scores else 0
        mrr = np.mean(mrr_scores) if mrr_scores else 0
        cv = np.mean(cv_scores) if cv_scores else 0


        ndcg_low = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "Low"
        ]
        ndcg_moderate_high = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "High" 
        ]
    
       

        cv_low = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "Low"
        ]
        cv_moderate_high = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "High"
        ]
        hr_low = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        hr_moderate_high = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "High" 
        ]
        mrr_low = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        mrr_moderate_high = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "High" 
        ]
        ucv_ndcg = calculate_ucv(ndcg_low, ndcg_moderate_high)
        
        ucv_hr = calculate_ucv(hr_low, hr_moderate_high)
        ucv_mrr = calculate_ucv(mrr_low, mrr_moderate_high)
        cv_low_mean = np.mean(cv_low) if cv_low else 0
        cv_moderate_high_mean = np.mean(cv_moderate_high) if cv_moderate_high else 0
        hr_low_mean = np.mean(hr_low) if hr_low else 0
        hr_moderate_high_mean = np.mean(hr_moderate_high) if hr_moderate_high else 0
        mrr_low_mean = np.mean(mrr_low) if mrr_low else 0
        mrr_moderate_high_mean = np.mean(mrr_moderate_high) if mrr_moderate_high else 0
        di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_moderate_high) if ndcg_moderate_high else np.inf 
        gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_moderate_high)
        ndcg_low = np.mean(ndcg_low) if ndcg_low else 0
        ndcg_moderate_high = np.mean(ndcg_moderate_high) if ndcg_moderate_high else 0
        di_hr = calculate_disparate_impact(hr_low, hr_moderate_high) if hr_moderate_high else np.inf 
        gru_hr = calculate_group_recommender_unfairness(hr_low, hr_moderate_high)
        di_mrr = calculate_disparate_impact(mrr_low, mrr_moderate_high) if mrr_moderate_high else np.inf
        gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_moderate_high)

        
        round_metrics[round_num + 1] = {
            "ndcg@k": ndcg,
            "hr@k": hr,
            "mrr@k": mrr,
            "ndcg_low": ndcg_low,
            "ndcg_moderate_high": ndcg_moderate_high,
            "DI_NDCG": di_ndcg,
            "GRU_NDCG": gru_ndcg,
            "CV": cv,
            "cv_low": cv_low_mean,
            "cv_moderate_high": cv_moderate_high_mean,
            "HR_LOW": hr_low_mean,
            "HR_MODERATE_HIGH": hr_moderate_high_mean,
            "MRR_LOW": mrr_low_mean,
            "MRR_MODERATE_HIGH": mrr_moderate_high_mean,
            "UCV_NDCG": ucv_ndcg,
            "UCV_HR": ucv_hr,
            "UCV_MRR": ucv_mrr,
            "DI_HR": di_hr,
            "GRU_HR": gru_hr,
            "DI_MRR": di_mrr,
            "GRU_MRR": gru_mrr
        }

        print(f"Metrics for round {round_num + 1}: {round_metrics[round_num + 1]}")


        if round_num == 0 and user_id_to_print in user_id_map:
            user_index = user_id_map[user_id_to_print]
            scores = model.predict(user_index, np.arange(interactions.shape[1]))
            top_items_indices = np.argsort(-scores)[:k]
            top_items_actual_ids = [train_user_item_matrix.columns[i] for i in top_items_indices]
            print(
                f"Top {k} recommendations for user {user_id_to_print}: {top_items_actual_ids}"
            )
        elif round_num == 0 and user_id_to_print not in user_id_map:
            print(
                f"User {user_id_to_print} not found in training data mapping. Skipping recommendations."
            )

    return round_metrics

interactions = csr_matrix(train_user_item_matrix.values)


round_results = evaluate_lightfm_multiple_rounds(interactions, test_df, df_rent_clean)

print("\nOverall Results:")
print(round_results)

Training Round 1...
User 696430 not found in training data. Skipping evaluation.
User 987390 not found in training data. Skipping evaluation.
User 405715 not found in training data. Skipping evaluation.
User 436933 not found in training data. Skipping evaluation.
User 522905 not found in training data. Skipping evaluation.
User 751887 not found in training data. Skipping evaluation.
User 854188 not found in training data. Skipping evaluation.
User 818665 not found in training data. Skipping evaluation.
User 750491 not found in training data. Skipping evaluation.
User 387463 not found in training data. Skipping evaluation.
User 160450 not found in training data. Skipping evaluation.
User 523458 not found in training data. Skipping evaluation.
User 791921 not found in training data. Skipping evaluation.
User 214592 not found in training data. Skipping evaluation.
User 24811 not found in training data. Skipping evaluation.
User 418034 not found in training data. Skipping evaluation.
User 

Overall Results:
{1: {'ndcg@k': 0.006912527105497238, 'hr@k': 0.012555555555555556, 'mrr@k': 0.005074074074074074, 'ndcg_low': 0.014818835626908363, 'ndcg_moderate_high': 0.005352721925604633, 'DI_NDCG': 2.7684673018455848, 'GRU_NDCG': 0.009466113701303729, 'CV': 0.025111111111111115, 'cv_low': 0.049898853674983146, 'cv_moderate_high': 0.02022083277903419, 'HR_LOW': 0.024949426837491573, 'HR_MODERATE_HIGH': 0.010110416389517094, 'MRR_LOW': 0.011485727129692064, 'MRR_MODERATE_HIGH': 0.0038091437186820983, 'UCV_NDCG': 8.485734784068576, 'UCV_HR': 8.073164067254087, 'UCV_MRR': 9.302230135492294, 'DI_HR': 2.46769528338716, 'GRU_HR': 0.014839010447974478, 'DI_MRR': 3.0153042200429074, 'GRU_MRR': 0.0076765834110099655}, 2: {'ndcg@k': 0.006990195329679597, 'hr@k': 0.012666666666666666, 'mrr@k': 0.005133333333333333, 'ndcg_low': 0.016578119201138313, 'ndcg_moderate_high': 0.00509863072925745, 'DI_NDCG': 3.2514845811461037, 'GRU_NDCG': 0.011479488471880864, 'CV': 0.025333333333333333, 'cv_low': 0.05799055967633176, 'cv_moderate_high': 0.0188905148330451, 'HR_LOW': 0.02899527983816588, 'HR_MODERATE_HIGH': 0.00944525741652255, 'MRR_LOW': 0.012530905821532929, 'MRR_MODERATE_HIGH': 0.0036738947275065407, 'UCV_NDCG': 8.395601220599916, 'UCV_HR': 8.013838740969508, 'UCV_MRR': 9.1545963220025, 'DI_HR': 3.069824204837928, 'GRU_HR': 0.01955002242164333, 'DI_MRR': 3.410796103577418, 'GRU_MRR': 0.008857011094026389}, 3: {'ndcg@k': 0.007339389669820055, 'hr@k': 0.013222222222222222, 'mrr@k': 0.005433333333333334, 'ndcg_low': 0.018527898893130255, 'ndcg_moderate_high': 0.005132051745359629, 'DI_NDCG': 3.6102322837806673, 'GRU_NDCG': 0.013395847147770625, 'CV': 0.026444444444444444, 'cv_low': 0.06608226567768037, 'cv_moderate_high': 0.01862445124384728, 'HR_LOW': 0.033041132838840186, 'HR_MODERATE_HIGH': 0.00931222562192364, 'MRR_LOW': 0.013823331085637222, 'MRR_MODERATE_HIGH': 0.003778102966609019, 'UCV_NDCG': 8.340071381915592, 'UCV_HR': 7.862043426342337, 'UCV_MRR': 9.27305729462702, 'DI_HR': 3.548145650708024, 'GRU_HR': 0.023728907216916548, 'DI_MRR': 3.6588021046033457, 'GRU_MRR': 0.010045228119028204}}

In [37]:
print(f"train_user_item_matrix.shape: {train_user_item_matrix.shape}")


train_user_item_matrix.shape: (27439, 5389)


In [38]:
print(f"df_rent_clean head:\n{df_rent_clean.head()}")


df_rent_clean head:
          fit  user_id bust size  item_id  weight  rating     rented for  \
125033    fit   581157       34b   126335  118lbs     4.0          party   
83094   large   623100       34d   125564  120lbs     2.0  formal affair   
131558    fit   284739       36c   145906  170lbs    10.0  formal affair   
54303   small   395409       36c   144714  170lbs     6.0  formal affair   
140548    fit   946927       34c   160612  130lbs    10.0       vacation   

                                              review_text  body type  \
125033  The dress is gorgeous but the gathering in the...     petite   
83094   This dress just didn't work for me.  There was...  hourglass   
131558  I wore this dress for an inaugural ball.  I go...       pear   
54303          Beautiful gown, i just wish it had fit me        pear   
140548  This dress fit me beautifully, I received both...   athletic   

                                   review_summary category height  size   age  \
125033   

In [45]:
import numpy as np
from lightfm import LightFM
from scipy.sparse import coo_matrix # sparse matrix
from lightfm.evaluation import precision_at_k, recall_at_k


interactions = coo_matrix(train_user_item_matrix.values)


model = LightFM(loss='bpr')
 # https://making.lyst.com/lightfm/docs/lightfm.html


model.fit(interactions, epochs=30, num_threads=2)

train_precision = precision_at_k(model, interactions, k=5).mean()
train_recall = recall_at_k(model, interactions, k=5).mean()


print(f"Train Precision at k=5: {train_precision}")
print(f"Train Recall at k=5: {train_recall}")


user_id = 55
scores = model.predict(user_id, np.arange(interactions.shape[1]))


top_items = np.argsort(-scores)

print("Top 10 recommended items for user 55:", top_items[:10])

KeyboardInterrupt: 

In [None]:
test_items = test_df.groupby("user_id")["item_id"].first().to_dict()

def calculate_metrics_for_user(user, actual_item, recs, group):
    relevance_scores = [1 if item == actual_item else 0 for item in recs]
    ndcg = ndcg_at_k(relevance_scores, k=6)
    hr = hr_at_k([actual_item], recs, k=5)
    mrr = mrr_at_k([actual_item], recs, k=5)
    cv = coefficient_of_variance(relevance_scores)
    return [user, ndcg, hr, mrr, cv, group]



metrics_lightfm = []
for user_id in train_user_item_matrix.index:  
    
    actual_item = test_items.get(user_id, None)
    if actual_item is None:
        continue

    
    engagement_level = df_rent_clean[df_rent_clean["user_id"] == user_id][
        "user_activity"
    ].values[0]
    try:
        user_id_internal = train_user_item_matrix.index.get_loc(user_id)
    except KeyError:
        
        print(f"User {user_id} not found in training data. Skipping.")
        continue

   
    scores = best_model.predict(user_id_internal, np.arange(interactions.shape[1])) 
    top_items = np.argsort(-scores)
    recs = [train_user_item_matrix.columns[i] for i in top_items]

   
    metrics_lightfm.append(
        calculate_metrics_for_user(user_id, actual_item, recs, engagement_level)
    )

metrics_lightfm_df = pd.DataFrame(
    metrics_lightfm, columns=["User", "NDCG@K", "HR@K", "MRR@K","CV", "Engagement Group"]
)
metrics_lightfm_df['User'] = pd.to_numeric(metrics_lightfm_df['User'], errors='coerce')


grouped_metrics_lightfm = metrics_lightfm_df.groupby("Engagement Group").agg(
    {"NDCG@K": "mean", "HR@K": "mean", "MRR@K": "mean", "CV":"mean"}
)
print("\nLightFM Metrics by User Engagement Level:")
grouped_metrics_lightfm


In [None]:


ndcg_low = grouped_metrics_lightfm.loc["Low", "NDCG@K"]
ndcg_high = grouped_metrics_lightfm.loc["High", "NDCG@K"]


di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_high)
gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_high)

print(f"\nLightFM - BPR - NDCG Metrics:")
print(f"Disparate Impact: {di_ndcg}")
print(f"Group Recommender Unfairness: {gru_ndcg}")

hr_low = grouped_metrics_lightfm.loc["Low", "HR@K"]
hr_high = grouped_metrics_lightfm.loc["High", "HR@K"]


di_hr = calculate_disparate_impact(hr_low, hr_high)
gru_hr = calculate_group_recommender_unfairness(hr_low, hr_high)

print(f"\nLightFM - BPR - HR@K Metrics:")
print(f"Disparate Impact: {di_hr}")
print(f"Group Recommender Unfairness: {gru_hr}")


mrr_low = grouped_metrics_lightfm.loc["Low", "MRR@K"]
mrr_high = grouped_metrics_lightfm.loc["High", "MRR@K"]


di_mrr = calculate_disparate_impact(mrr_low, mrr_high)
gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_high)

print(f"\nLightFM - BPR - MRR@K Metrics:")
print(f"Disparate Impact: {di_mrr}")
print(f"Group Recommender Unfairness: {gru_mrr}")

In [46]:
import numpy as np
from lightfm import LightFM
from scipy.sparse import csr_matrix  # sparse matrix
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from lightfm.cross_validation import random_train_test_split
from sklearn.metrics import ndcg_score
import pandas as pd # Import pandas


def evaluate_lightfm_multiple_rounds(
    interactions, test_df, df_rent_clean, num_rounds=3, k=5, epochs=30, user_id_to_print=55
):
    """
    Evaluates LightFM model with multiple training rounds, fairness metrics, and prints recommendations for a user.

    Args:
        interactions: The user-item interaction matrix (sparse).
        test_df: The test DataFrame.
        df_rent_clean: The cleaned main DataFrame with user activity.
        num_rounds: The number of training rounds.
        k: The value of k for precision@k, recall@k metrics.
        epochs: The number of epochs for each training round.
        user_id_to_print: The user ID for which recommendations will be printed.

    Returns:
        A dictionary containing metrics for each round.
    """

    train_interactions, test_interactions = random_train_test_split(
        interactions, test_percentage=0.2, random_state=42
    )

    round_metrics = {}
    model = LightFM(loss='bpr', no_components=50, learning_rate=0.1, item_alpha=1e-05, user_alpha=1e-06)


    test_items = test_df.groupby("user_id")["item_id"].first().to_dict()


    user_id_map = {user: i for i, user in enumerate(train_user_item_matrix.index)}


    for round_num in range(num_rounds):
        print(f"Training Round {round_num + 1}...")
        model.fit(train_interactions, epochs=epochs) 

       
        test_user_ids = test_df['user_id'].unique()

        ndcg_scores = []
        hr_scores = []
        mrr_scores = []
        cv_scores = []
        engagement_levels_list = []

        for user_id in test_user_ids:
            actual_item = test_items.get(user_id, None)
            if actual_item is None:
                continue

            user_id_internal = user_id_map.get(user_id, None)
            if user_id_internal is None:
               
                print(f"User {user_id} not found in training data. Skipping evaluation.")
                continue


            engagement_level_row = df_rent_clean[df_rent_clean["user_id"] == user_id]
            if engagement_level_row.empty:
                 print(f"User {user_id} not found in df_rent_clean for engagement level. Skipping.")
                 continue
            engagement_level = engagement_level_row["user_activity"].values[0]
            engagement_levels_list.append(engagement_level)


            scores = model.predict(user_id_internal, np.arange(interactions.shape[1]))
            top_k_indices = np.argsort(-scores)[:k]
            top_k_items_predicted = [train_user_item_matrix.columns[i] for i in top_k_indices]

            relevance_scores = [1 if item == actual_item else 0 for item in top_k_items_predicted]

           
            ndcg_scores.append(ndcg_at_k(relevance_scores, k))


            hr_scores.append(hr_at_k([actual_item], top_k_items_predicted, k))
            mrr_scores.append(mrr_at_k([actual_item], top_k_items_predicted, k))
            cv_scores.append(coefficient_of_variance(np.array(relevance_scores))) 

 
        ndcg = np.mean(ndcg_scores) if ndcg_scores else 0
        hr = np.mean(hr_scores) if hr_scores else 0
        mrr = np.mean(mrr_scores) if mrr_scores else 0
        cv = np.mean(cv_scores) if cv_scores else 0



        ndcg_low = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "Low"
        ]
        ndcg_moderate_high = [
            ndcg_scores[i]
            for i in range(len(ndcg_scores))
            if engagement_levels_list[i] == "High"
        ]
        cv_low = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "Low"
        ]
        cv_moderate_high = [
            cv_scores[i]
            for i in range(len(cv_scores))
            if engagement_levels_list[i] == "High"
        ]
        hr_low = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        hr_moderate_high = [
            hr_scores[i]
            for i in range(len(hr_scores))
            if engagement_levels_list[i] == "High"
        ]
        mrr_low = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "Low"
        ]
        mrr_moderate_high = [
            mrr_scores[i]
            for i in range(len(mrr_scores))
            if engagement_levels_list[i] == "High"
        ]

        ucv_ndcg = calculate_ucv(ndcg_low, ndcg_moderate_high)
        
        ucv_hr = calculate_ucv(hr_low, hr_moderate_high)
        ucv_mrr = calculate_ucv(mrr_low, mrr_moderate_high)
        cv_low_mean = np.mean(cv_low) if cv_low else 0
        cv_moderate_high_mean = np.mean(cv_moderate_high) if cv_moderate_high else 0
        hr_low_mean = np.mean(hr_low) if hr_low else 0
        hr_moderate_high_mean = np.mean(hr_moderate_high) if hr_moderate_high else 0
        mrr_low_mean = np.mean(mrr_low) if mrr_low else 0
        mrr_moderate_high_mean = np.mean(mrr_moderate_high) if mrr_moderate_high else 0
        di_ndcg = calculate_disparate_impact(ndcg_low, ndcg_moderate_high) if ndcg_moderate_high else np.inf 
        gru_ndcg = calculate_group_recommender_unfairness(ndcg_low, ndcg_moderate_high)
        ndcg_low = np.mean(ndcg_low) if ndcg_low else 0
        ndcg_moderate_high = np.mean(ndcg_moderate_high) if ndcg_moderate_high else 0
        di_hr = calculate_disparate_impact(hr_low, hr_moderate_high) if hr_moderate_high else np.inf
        gru_hr = calculate_group_recommender_unfairness(hr_low, hr_moderate_high)
        di_mrr = calculate_disparate_impact(mrr_low, mrr_moderate_high) if mrr_moderate_high else np.inf
        gru_mrr = calculate_group_recommender_unfairness(mrr_low, mrr_moderate_high)
        round_metrics[round_num + 1] = {
            "ndcg@k": ndcg,
            "hr@k": hr,
            "mrr@k": mrr,
            "DI_NDCG": di_ndcg,
            "GRU_NDCG": gru_ndcg,
            "ndcg_low": ndcg_low,
            "ndcg_moderate_high": ndcg_moderate_high,
            "CV": cv,
            "cv_low": cv_low_mean,
            "cv_moderate_high": cv_moderate_high_mean,
            "HR_LOW": hr_low_mean,
            "HR_MODERATE_HIGH": hr_moderate_high_mean,
            "MRR_LOW": mrr_low_mean,
            "MRR_MODERATE_HIGH": mrr_moderate_high_mean,
            "UCV_NDCG": ucv_ndcg,
            "UCV_HR": ucv_hr,
            "UCV_MRR": ucv_mrr,
            "DI_HR": di_hr,
            "GRU_HR": gru_hr,
            "DI_MRR": di_mrr,
            "GRU_MRR": gru_mrr
        }

        print(f"Metrics for round {round_num + 1}: {round_metrics[round_num + 1]}")


        if round_num == 0 and user_id_to_print in user_id_map: 
            user_index = user_id_map[user_id_to_print]
            scores = model.predict(user_index, np.arange(interactions.shape[1]))
            top_items_indices = np.argsort(-scores)[:k]
            top_items_actual_ids = [train_user_item_matrix.columns[i] for i in top_items_indices]
            print(
                f"Top {k} recommendations for user {user_id_to_print}: {top_items_actual_ids}"
            )
        elif round_num == 0 and user_id_to_print not in user_id_map:
            print(
                f"User {user_id_to_print} not found in training data mapping. Skipping recommendations."
            )

    return round_metrics

interactions = csr_matrix(train_user_item_matrix.values)

round_results = evaluate_lightfm_multiple_rounds(interactions, test_df, df_rent_clean)

print("\nOverall Results:")
print(round_results)

Training Round 1...
User 696430 not found in training data. Skipping evaluation.
User 987390 not found in training data. Skipping evaluation.
User 405715 not found in training data. Skipping evaluation.
User 436933 not found in training data. Skipping evaluation.
User 522905 not found in training data. Skipping evaluation.
User 751887 not found in training data. Skipping evaluation.
User 854188 not found in training data. Skipping evaluation.
User 818665 not found in training data. Skipping evaluation.
User 750491 not found in training data. Skipping evaluation.
User 387463 not found in training data. Skipping evaluation.
User 160450 not found in training data. Skipping evaluation.
User 523458 not found in training data. Skipping evaluation.
User 791921 not found in training data. Skipping evaluation.
User 214592 not found in training data. Skipping evaluation.
User 24811 not found in training data. Skipping evaluation.
User 418034 not found in training data. Skipping evaluation.
User 

Overall Results:
{1: {'ndcg@k': 0.0032436810153349226, 'hr@k': 0.0064444444444444445, 'mrr@k': 0.002194444444444444, 'DI_NDCG': 1.9488763145771328, 'GRU_NDCG': 0.0026616873421238326, 'ndcg_low': 0.0054667814320843505, 'ndcg_moderate_high': 0.002805094089960518, 'CV': 0.012888888888888889, 'cv_low': 0.02157788267026298, 'cv_moderate_high': 0.011174670746308367, 'HR_LOW': 0.01078894133513149, 'HR_MODERATE_HIGH': 0.0055873353731541835, 'MRR_LOW': 0.0037199370645088783, 'MRR_MODERATE_HIGH': 0.0018934858764578066, 'UCV_NDCG': 11.64644070136195, 'UCV_HR': 11.458065367879275, 'UCV_MRR': 12.075506405521077, 'DI_HR': 1.9309636194329385, 'GRU_HR': 0.005201605961977307, 'DI_MRR': 1.964597207066504, 'GRU_MRR': 0.0018264511880510717}, 2: {'ndcg@k': 0.0030591744490191834, 'hr@k': 0.006111111111111111, 'mrr@k': 0.0020703703703703704, 'DI_NDCG': 1.6424746662995122, 'GRU_NDCG': 0.0017772884662253896, 'ndcg_low': 0.004543605271309878, 'ndcg_moderate_high': 0.0027663168050844887, 'CV': 0.012222222222222223, 'cv_low': 0.01753202966958867, 'cv_moderate_high': 0.011174670746308367, 'HR_LOW': 0.008766014834794335, 'HR_MODERATE_HIGH': 0.0055873353731541835, 'MRR_LOW': 0.003158013036637447, 'MRR_MODERATE_HIGH': 0.0018557935346547824, 'UCV_NDCG': 12.307826946650248, 'UCV_HR': 11.987267544517136, 'UCV_MRR': 13.054980436172727, 'DI_HR': 1.5689079407892623, 'GRU_HR': 0.0031786794616401517, 'DI_MRR': 1.7017049459787592, 'GRU_MRR': 0.0013022195019826645}, 3: {'ndcg@k': 0.002938381990438287, 'hr@k': 0.006, 'mrr@k': 0.0019444444444444444, 'DI_NDCG': 0.9818839325837447, 'GRU_NDCG': 5.339130597340272e-05, 'ndcg_low': 0.0028937883852158345, 'ndcg_moderate_high': 0.002947179691189237, 'CV': 0.012, 'cv_low': 0.012137559002022926, 'cv_moderate_high': 0.011972861513901822, 'HR_LOW': 0.006068779501011463, 'HR_MODERATE_HIGH': 0.005986430756950911, 'MRR_LOW': 0.0018655877725331535, 'MRR_MODERATE_HIGH': 0.0019600017737572612, 'UCV_NDCG': 13.074922174816814, 'UCV_HR': 12.841696301937258, 'UCV_MRR': 13.673085520268357, 'DI_HR': 1.0137559002022927, 'GRU_HR': 8.23487440605522e-05, 'DI_MRR': 0.95182963480532, 'GRU_MRR': 9.441400122410775e-05}}

In [43]:
metrics = ["NDCG@K", "HR@K", "MRR@K"]

for metric in metrics:
    low_metric = grouped_metrics_lightfm.loc["Low", metric]
    high_metric = grouped_metrics_lightfm.loc["High", metric]

    di = calculate_disparate_impact([low_metric], [high_metric]) 
    gru = calculate_group_recommender_unfairness([low_metric], [high_metric])

    print(f"\nLightFM - {metric} Metrics:")
    print(f"Disparate Impact: {di}")
    print(f"Group Recommender Unfairness: {gru}")


LightFM - NDCG@K Metrics:
Disparate Impact: 2.336459514210217
Group Recommender Unfairness: 0.017081941766557376

LightFM - HR@K Metrics:
Disparate Impact: 2.2052482244660263
Group Recommender Unfairness: 0.024691795472631107

LightFM - MRR@K Metrics:
Disparate Impact: 2.5209304315970447
Group Recommender Unfairness: 0.012699711776405637
