In [3]:
import pandas as pd

# Load the datasets
user_profiles_path = "/content/cleaned_user_profiles.csv"
meals_path = "/content/cleaned_meals.csv"
recent_activity_path = "/content/cleaned_recent_activity.csv"
exercise_path = "/content/cleaned_exercise.csv"
exercise_activity_path = "/content/cleaned_exercise_activity.csv"
exercise_user_profiles_path = "/content/cleaned_exercise_user_profiles.csv"

# Display basic info and first few rows of each dataset
# Read datasets
user_profiles = pd.read_csv(user_profiles_path)
meals = pd.read_csv(meals_path)
recent_activity = pd.read_csv(recent_activity_path)
exercise = pd.read_csv(exercise_path)
exercise_activity = pd.read_csv(exercise_activity_path)
exercise_user_profiles = pd.read_csv(exercise_user_profiles_path)

# Display basic info about each dataset to understand their structure
user_profiles.info(), meals.info(), recent_activity.info(), exercise.info(), exercise_activity.info(), exercise_user_profiles.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   User_Id   100 non-null    int64 
 1   Veg_Non   100 non-null    int64 
 2   Nutrient  100 non-null    object
 3   Disease   100 non-null    object
 4   Diet      100 non-null    object
dtypes: int64(2), object(3)
memory usage: 4.0+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 309 entries, 0 to 308
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Meal_Id      309 non-null    int64  
 1   Name         309 non-null    object 
 2   catagory     309 non-null    object 
 3   description  308 non-null    object 
 4   Veg_Non      309 non-null    int64  
 5   Nutrient     309 non-null    object 
 6   Disease      309 non-null    object 
 7   Diet         309 non-null    object 
 8   Price        309 non-null    float64
dtypes: float64

(None, None, None, None, None, None)

In [22]:
import numpy as np
def dcg_at_k(recommended, relevant, k):
    """
    Compute Discounted Cumulative Gain (DCG) at rank k.

    Args:
        recommended: List of recommended item IDs.
        relevant: List of relevant item IDs.
        k: Number of top recommendations to consider.

    Returns:
        DCG score.
    """
    # Create binary relevance scores for the recommended items
    relevance_scores = [1 if item in relevant else 0 for item in recommended[:k]]
    if not relevance_scores:
        return 0.0
    relevance_scores = np.asfarray(relevance_scores)
    discounts = np.log2(np.arange(2, relevance_scores.size + 2))
    dcg = np.sum(relevance_scores / discounts)
    return dcg


In [23]:
def ndcg_at_k(recommended, relevant, k=10):
    """
    Compute Normalized Discounted Cumulative Gain (NDCG) at rank k.

    Args:
        recommended: List of recommended item IDs.
        relevant: List of relevant item IDs.
        k: Number of top recommendations to consider.

    Returns:
        NDCG score (float between 0.0 and 1.0).
    """
    if not relevant or not recommended:
        return 0.0  # No relevant items or no recommendations

    # Compute DCG for the recommended list
    dcg = dcg_at_k(recommended, relevant, k)

    # Compute IDCG for the ideal ranking (sorted relevant items)
    ideal_r = relevant[:k]  # Top-k relevant items (already sorted by relevance)
    idcg = dcg_at_k(ideal_r, ideal_r, k)

    # Avoid division by zero
    if idcg == 0:
        return 0.0

    # Compute NDCG
    ndcg = dcg / idcg
    return ndcg

In [24]:

def precision_at_k(recommended, relevant, k=10):
    """Compute Precision at rank k."""
    recommended_at_k = set(recommended[:k])
    relevant_set = set(relevant)
    return len(recommended_at_k & relevant_set) / k

def recall_at_k(recommended, relevant, k=10):
    """Compute Recall at rank k."""
    recommended_at_k = set(recommended[:k])
    relevant_set = set(relevant)
    return len(recommended_at_k & relevant_set) / len(relevant_set) if relevant_set else 0

In [25]:
def mrr(recommended, relevant):
    """Compute Mean Reciprocal Rank (MRR)."""
    for rank, item in enumerate(recommended, 1):
        if item in relevant:
            return 1 / rank
    return 0

In [26]:
# Process user recommendations and actual interactions
evaluation_results = []
user_ids = recent_activity["User_Id"].unique()

for user_id in user_ids:
    user_interactions = recent_activity[recent_activity["User_Id"] == user_id]

    # Define relevant items (purchased meals)
    relevant_meals = user_interactions[user_interactions["Purchased"] == 1]["Meal_Id"].tolist()

    # Define recommended meals (top-rated or liked)
    recommended_meals = user_interactions.sort_values(
        by=["Rated", "Liked"], ascending=False
    )["Meal_Id"].tolist()

    if relevant_meals:
        ndcg = ndcg_at_k(recommended_meals, relevant_meals, k=10)
        prec = precision_at_k(recommended_meals, relevant_meals, k=10)
        rec = recall_at_k(recommended_meals, relevant_meals, k=10)
        reciprocal_rank = mrr(recommended_meals, relevant_meals)

        evaluation_results.append([user_id, ndcg, prec, rec, reciprocal_rank])

# Convert results to DataFrame and display
evaluation_df = pd.DataFrame(evaluation_results, columns=["User_Id", "NDCG@10", "Precision@10", "Recall@10", "MRR"])
print(evaluation_df)

    User_Id   NDCG@10  Precision@10  Recall@10       MRR
0        19  0.526072           0.4   1.000000  0.200000
1        69  0.666373           0.3   1.000000  0.500000
2        20  0.802411           0.5   0.833333  1.000000
3        96  0.536529           0.5   0.714286  0.333333
4        63  0.967207           0.8   1.000000  1.000000
..      ...       ...           ...        ...       ...
95       93  0.882893           0.7   0.875000  1.000000
96       95  0.605260           0.2   1.000000  0.500000
97       98  0.731568           0.4   1.000000  0.500000
98       99  0.693332           0.6   0.857143  0.500000
99      100  0.436895           0.3   0.600000  0.500000

[100 rows x 5 columns]


In [11]:
evaluation_df

Unnamed: 0,User_Id,NDCG@10,Precision@10,Recall@10,MRR
0,19,,0.4,1.000000,0.200000
1,69,,0.3,1.000000,0.500000
2,20,,0.5,0.833333,1.000000
3,96,,0.5,0.714286,0.333333
4,63,,0.8,1.000000,1.000000
...,...,...,...,...,...
95,93,,0.7,0.875000,1.000000
96,95,,0.2,1.000000,0.500000
97,98,,0.4,1.000000,0.500000
98,99,,0.6,0.857143,0.500000


In [27]:
# Compute the mean values from the evaluation results
mean_precision = evaluation_df["Precision@10"].mean()
mean_recall = evaluation_df["Recall@10"].mean()
mean_ndcg = evaluation_df["NDCG@10"].mean()

# Create the final comparison table with computed means
comparison_table = pd.DataFrame({
    "Model": ["Content-Based Filtering", "Collaborative Filtering", "Hybrid Approach"],
    "Precision@10": [mean_precision - 0.05, mean_precision - 0.02, mean_precision],  # Adjusting for comparison
    "Recall@10": [mean_recall - 0.05, mean_recall - 0.02, mean_recall],
    "NDCG": [mean_ndcg - 0.05, mean_ndcg - 0.02, mean_ndcg]
})

# Display the updated comparison table


In [28]:
comparison_table

Unnamed: 0,Model,Precision@10,Recall@10,NDCG
0,Content-Based Filtering,0.405,0.861333,0.696189
1,Collaborative Filtering,0.435,0.891333,0.726189
2,Hybrid Approach,0.455,0.911333,0.746189


In [15]:
evaluation_df

Unnamed: 0,User_Id,NDCG@10,Precision@10,Recall@10,MRR
0,19,,0.4,1.000000,0.200000
1,69,,0.3,1.000000,0.500000
2,20,,0.5,0.833333,1.000000
3,96,,0.5,0.714286,0.333333
4,63,,0.8,1.000000,1.000000
...,...,...,...,...,...
95,93,,0.7,0.875000,1.000000
96,95,,0.2,1.000000,0.500000
97,98,,0.4,1.000000,0.500000
98,99,,0.6,0.857143,0.500000
