In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
from lenskit.algorithms.item_knn import ItemItem
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

print('Loading datasets...')
ratings_df = pd.read_csv("Dataset/ratings_splits/temporal_global/filtered/train.csv")
ratings_df_cleaned = ratings_df.drop(columns=['RatingID', 'Date', 'Vintage']).rename(columns={'WineID': 'item', 'UserID': 'user', 'Rating': 'rating'})

ratings_df_test = pd.read_csv("Dataset/ratings_splits/temporal_global/filtered/test.csv")
ratings_df_cleaned_test = ratings_df_test.drop(columns=['RatingID', 'Date', 'Vintage']).rename(columns={'WineID': 'item', 'UserID': 'user', 'Rating': 'rating'})

wines_df = pd.read_csv('Dataset/last/Xwines_Slim_1K_wines.csv', index_col="WineID")
wines_df['WineID'] = wines_df.index

In [None]:
print('Fitting User-User algorithm...')
user_user = UserUser(15, min_nbrs=3)
recsys_user_user = Recommender.adapt(user_user)
recsys_user_user.fit(ratings_df_cleaned)

print('Fitting Item-Item algorithm...')
itemitem = ItemItem(15, min_nbrs=3)
recsys_item_item = Recommender.adapt(itemitem)
recsys_item_item.fit(ratings_df_cleaned)

In [None]:
def enrich_recommendations(recs_df, user_id):
    wine_names = []
    user_ratings = []

    for _, row in recs_df.iterrows():
        wine_row = wines_df[wines_df['WineID'] == row['item']]
        wine_names.append(wine_row.iloc[0]['WineName'] if not wine_row.empty else 'Unknown Wine')
        
        user_rating_row = ratings_df_cleaned_test[(ratings_df_cleaned_test['user'] == user_id)]
        user_rating_wine_row = user_rating_row[user_rating_row['item'] == row['item']]
        user_ratings.append(user_rating_wine_row.iloc[0]['rating'] if not user_rating_wine_row.empty else 'No rating')
    
    recs_df['WineName'] = wine_names
    recs_df['Your Rating'] = user_ratings
    return recs_df

# Cell 5: Metrikberechnungsfunktionen definieren
def calculate_acceptance_rate(recommendations_df):
    accepted_recommendations = recommendations_df[recommendations_df['Your Rating'] >= 4].shape[0]
    total_recommendations = recommendations_df.shape[0]
    return accepted_recommendations / total_recommendations if total_recommendations > 0 else 0

def calculate_satisfaction_score(recommendations_df):
    return recommendations_df['Your Rating'].mean() if not recommendations_df.empty else 0

def calculate_coverage(recommendations_df):
    explained_recommendations = recommendations_df[recommendations_df['Your Rating'].notna()].shape[0]
    total_recommendations = recommendations_df.shape[0]
    return explained_recommendations / total_recommendations if total_recommendations > 0 else 0

def calculate_diversity(recommendations_df, attribute='WineName'):
    if attribute not in recommendations_df.columns:
        return 0
    unique_values = recommendations_df[attribute].nunique()
    total_recommendations = recommendations_df.shape[0]
    return unique_values / total_recommendations if total_recommendations > 0 else 0

def calculate_explanation_coherence(recommendations_df):
    if 'WineName' not in recommendations_df.columns or recommendations_df['WineName'].dropna().empty:
        return 0

    valid_wine_names = recommendations_df['WineName'].dropna()
    valid_wine_names = valid_wine_names[valid_wine_names.apply(lambda x: isinstance(x, str) and x.strip() != "")]

    if valid_wine_names.empty:
        return 0

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(valid_wine_names)
    similarity_matrix = cosine_similarity(tfidf_matrix)
    return similarity_matrix.mean() if similarity_matrix.size > 1 else 1.0

In [None]:
print("Calculating metrics for all users...")
user_ids = ratings_df_cleaned_test['user'].unique()
all_user_metrics = []

for user_id in tqdm(user_ids, desc="Processing users"):
    selected_wines_useruser = recsys_user_user.recommend(user_id, 1000)
    enriched_useruser_recs = enrich_recommendations(selected_wines_useruser, user_id)

    selected_wines_itemitem = recsys_item_item.recommend(user_id, 1000)
    enriched_itemitem_recs = enrich_recommendations(selected_wines_itemitem, user_id)

    useruser_recs_df = enriched_useruser_recs[['WineName', 'Your Rating', 'score']].rename(columns={'score': 'User-User Score'})
    itemitem_recs_df = enriched_itemitem_recs[['WineName', 'score']].rename(columns={'score': 'Item-Item Score'})
    merged_recs_df = pd.merge(useruser_recs_df, itemitem_recs_df, on='WineName', how='outer')
    merged_recs_df['Your Rating'] = pd.to_numeric(merged_recs_df['Your Rating'], errors='coerce')

    acceptance_rate = calculate_acceptance_rate(merged_recs_df)
    satisfaction_score = calculate_satisfaction_score(merged_recs_df)
    coverage = calculate_coverage(merged_recs_df)
    coherence = calculate_explanation_coherence(merged_recs_df)
    diversity = calculate_diversity(merged_recs_df, attribute='WineName')

    all_user_metrics.append({
        'UserID': user_id,
        'Acceptance Rate': acceptance_rate,
        'Satisfaction Score': satisfaction_score,
        'Coverage': coverage,
        'Explanation Coherence': coherence,
        'Diversity': diversity
    })

In [None]:
metrics_df = pd.DataFrame(all_user_metrics)
average_metrics = metrics_df.mean()

print("----- Average Metrics for All Users -----")
print(average_metrics)

sns.set(style="whitegrid")
plt.figure(figsize=(12, 6))
sns.boxplot(data=metrics_df[['Acceptance Rate', 'Satisfaction Score', 'Coverage', 'Explanation Coherence', 'Diversity']])
plt.title('Distribution of Recommendation Metrics Across All Users')
plt.show()

average_metrics.plot(kind='bar', color='skyblue', figsize=(10, 5))
plt.title('Average Metrics Across All Users')
plt.ylabel('Metric Score')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.show()