In [1]:
# Import libraries for data manipulation and TF-IDF
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the processed dataset
df = pd.read_csv('../data/processed/stress_relief_data.csv')
# Verify columns and first few rows
print('Columns:', df.columns.tolist())
print('First 5 rows:\n', df.head())

Columns: ['user_id', 'mood', 'stress_level', 'activity', 'feedback']
First 5 rows:
    user_id     mood stress_level    activity  feedback
0        2     calm          low       music         0
1        9     calm          low  meditation         1
2       44     calm          low  meditation         1
3       30  anxious         high     walking         0
4        3     calm          low       music         0


In [3]:
# Prepare data for content-based filtering
# Combine mood and activity into a 'profile' for each row
df['profile'] = df['mood'] + ' ' + df['activity']
# Filter rows where feedback=1 (liked activities)
df_liked = df[df['feedback'] == 1]
# Create TF-IDF matrix for mood+activity profiles
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_liked['profile'])
# Print shape of TF-IDF matrix
print('TF-IDF matrix shape:', tfidf_matrix.shape)

TF-IDF matrix shape: (72, 7)


In [5]:
# Function to recommend activities based on mood
def content_based_recommend(mood, tfidf, tfidf_matrix, df_liked, n=3):
    # Transform input mood to TF-IDF vector
    mood_vector = tfidf.transform([mood])
    # Compute cosine similarity between mood and all profiles
    similarities = cosine_similarity(mood_vector, tfidf_matrix).flatten()
    # Get indices of top n similar profiles
    top_indices = similarities.argsort()[-n:][::-1]
    # Return corresponding activities
    recommended_activities = df_liked.iloc[top_indices]['activity'].tolist()
    return recommended_activities

# Test recommendations for a sample mood
sample_mood = 'anxious'
recommended = content_based_recommend(sample_mood, tfidf, tfidf_matrix, df_liked)
print(f'Recommended activities for mood \"{sample_mood}\":', recommended)

Recommended activities for mood "anxious": ['meditation', 'meditation', 'meditation']
