In [35]:
# Import libraries for data manipulation and bandit
import pandas as pd
import numpy as np

In [36]:
# Load the processed dataset
df = pd.read_csv('../data/processed/stress_relief_data.csv')
# Verify columns and first few rows
print('Columns:', df.columns.tolist())
print('First 5 rows:\n', df.head())

Columns: ['user_id', 'mood', 'stress_level', 'activity', 'feedback']
First 5 rows:
    user_id     mood stress_level    activity  feedback
0      433     calm          low  journaling         0
1     1140  anxious         high  journaling         1
2      934  anxious         high  meditation         1
3      681  anxious         high  meditation         1
4     1623     calm          low       music         1


In [37]:
# LinUCB Bandit Implementation
class LinUCBBandit:
    def __init__(self, arms, context_dim, alpha=1.0):
        self.arms = arms  # List of activities
        self.context_dim = context_dim  # Number of context features
        self.alpha = alpha  # Exploration parameter
        # Initialize matrices for each arm
        self.A = {arm: np.identity(context_dim) for arm in arms}  # Covariance matrix
        self.b = {arm: np.zeros(context_dim) for arm in arms}  # Reward vector

    def select_arm(self, context):
        # Calculate UCB for each arm
        ucb_values = {}
        for arm in self.arms:
            A_inv = np.linalg.inv(self.A[arm])
            theta = A_inv.dot(self.b[arm])
            # predicted reward + exploration term
            exploit = theta.dot(context)
            explore = self.alpha * np.sqrt(context.T.dot(A_inv).dot(context))
            ucb = exploit + explore
            ucb_values[arm] = ucb
        # Select arm with highest UCB
        return max(ucb_values, key=ucb_values.get)

    def update(self, arm, context, reward):
        # Update A and b for the chosen arm
        self.A[arm] += np.outer(context, context)
        self.b[arm] += reward * context

# Prepare context features
def get_context(mood, stress_level):
    # Convert mood and stress_level to binary features
    mood_vec = [1 if mood == 'anxious' else 0, 1 if mood == 'calm' else 0]
    stress_vec = [1 if stress_level == 'high' else 0, 1 if stress_level == 'low' else 0]
    return np.array(mood_vec + stress_vec)

# Example usage (uncomment to run after training the bandit or preparing data):
# arms = ['meditation', 'music', 'journaling', 'breathing', 'walking']
# bandit = LinUCBBandit(arms, context_dim=4, alpha=1.0)
# ctx = get_context('anxious', 'high')
# chosen = bandit.select_arm(ctx)
# print('Chosen arm:', chosen)

In [38]:
# Initialize bandit
activities = ['meditation', 'music', 'journaling', 'breathing', 'walking']
context_dim = 4  # 2 for mood (anxious, calm), 2 for stress_level (high, low)
bandit = LinUCBBandit(activities, context_dim, alpha=1.0)

# Simulate interactions
np.random.seed(42)
n_interactions = 100
for _ in range(n_interactions):
    # Randomly select a user context
    mood = np.random.choice(['anxious', 'calm'])
    stress_level = np.random.choice(['high', 'low'])
    context = get_context(mood, stress_level)
    # Select an arm (activity)
    arm = bandit.select_arm(context)
    # Simulate reward (use dataset feedback for matching context/activity)
    relevant_rows = df[(df['mood'] == mood) & (df['stress_level'] == stress_level) & (df['activity'] == arm)]
    if not relevant_rows.empty:
        reward = float(relevant_rows['feedback'].mean())
        if np.isnan(reward):
            reward = 0.0
    else:
        reward = float(np.random.choice([0, 1], p=[0.3, 0.7]))
    # Update bandit
    bandit.update(arm, context, reward)

# Test recommendation for a sample context
test_mood = 'anxious'
test_stress_level = 'low'
test_context = get_context(test_mood, test_stress_level)
recommended_activity = bandit.select_arm(test_context)
print(f"Recommended activity for mood '{test_mood}' and stress_level '{test_stress_level}':", recommended_activity)

Recommended activity for mood 'anxious' and stress_level 'low': walking
