# Hybrid Recommender System: SVD + Sentiment Blending


In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split


In [2]:
# --- Step 1: Load Feedback + Sentiment + Ratings ---
df = pd.read_csv("/kaggle/input/recommender-system-data/preprocessed_feedback.csv")
print(f"✅ Loaded feedback data: {df.shape}")

# Aggregate average sentiment per trainer
trainer_sentiment = df.groupby("trainer_id")["vader_score"].mean().reset_index()
trainer_sentiment.columns = ["trainer_id", "avg_sentiment"]


✅ Loaded feedback data: (38444, 17)


In [3]:
# --- Step 2: Prepare and Train Base SVD Model ---
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['learner_id', 'trainer_id', 'rating']], reader)
trainset = data.build_full_trainset()

model = SVD()
model.fit(trainset)
print("✅ SVD model trained.")


✅ SVD model trained.


In [4]:
# --- Step 3: Define Hybrid Recommendation Logic ---
def get_hybrid_recommendations(algo, learner_id, all_trainers, rated_trainers, sentiment_df,
                                weight_rating=0.7, weight_sentiment=0.3, top_n=5):
    """
    Hybrid ranking: combines predicted rating + trainer's avg sentiment score
    """
    sentiment_map = dict(zip(sentiment_df['trainer_id'], sentiment_df['avg_sentiment']))
    candidates = [t for t in all_trainers if t not in rated_trainers]

    scored = []
    for trainer in candidates:
        pred_rating = algo.predict(learner_id, trainer).est
        sentiment = sentiment_map.get(trainer, 0.0)
        hybrid_score = (weight_rating * pred_rating) + (weight_sentiment * sentiment * 5)  # normalize sentiment
        scored.append((trainer, hybrid_score))

    return sorted(scored, key=lambda x: x[1], reverse=True)[:top_n]


In [5]:
# --- Step 4: Generate Hybrid Recommendations ---
learner_id = df['learner_id'].iloc[0]
rated_trainers = df[df['learner_id'] == learner_id]['trainer_id'].tolist()
all_trainers = df['trainer_id'].unique().tolist()

hybrid_top = get_hybrid_recommendations(
    model, learner_id, all_trainers, rated_trainers,
    trainer_sentiment, weight_rating=0.6, weight_sentiment=0.4, top_n=5
)

# Display results
print(f"🔁 Top 5 Hybrid Recommendations for Learner {learner_id}:")
for trainer, score in hybrid_top:
    print(f"Trainer: {trainer}, Hybrid Score: {score:.2f}")


🔁 Top 5 Hybrid Recommendations for Learner 1:
Trainer: USA, Hybrid Score: 3.88
Trainer: France, Hybrid Score: 3.88
Trainer: Germany, Hybrid Score: 3.88
Trainer: Canada, Hybrid Score: 3.88


In [7]:
# --- Step 5: Save Hybrid Output ---
pd.DataFrame(hybrid_top, columns=["trainer_id", "hybrid_score"])\
  .to_csv(f"/kaggle/working/hybrid_recommendations_{learner_id}.csv", index=False)
print("📁 Hybrid recommendations saved.")

📁 Hybrid recommendations saved.
