In [1]:
import random
import math

class User:
    def __init__(self, user_id):  
        self.user_id = user_id
        self.preferences = {} 
        
    def update_preference(self, content_id, reward):
        if content_id not in self.preferences:
            self.preferences[content_id] = (0, 0)
        old_reward, num_impressions = self.preferences[content_id]
        self.preferences[content_id] = (old_reward + reward, num_impressions + 1)
    
    def get_preference(self, content_id):
        return self.preferences.get(content_id, (0, 0))

class Content:
    def __init__(self, content_id, category):  
        self.content_id = content_id
        self.category = category
            
class UCB1Bandit:
    def __init__(self, exploration_constant=1.0):  
        self.exploration_constant = exploration_constant
            
    def choose_content(self, user, content_options):
        ucb_values = {}
        total_impressions = sum(user.get_preference(content.content_id)[1] for content in content_options)
        
        for content in content_options:
            reward, impressions = user.get_preference(content.content_id)
            if impressions == 0:
                ucb_values[content] = float('inf')
            else:
                ucb_values[content] = (reward / impressions) + self.exploration_constant * math.sqrt(math.log(total_impressions) / impressions)
        
        return max(ucb_values, key=ucb_values.get)


users = {
    1: User(1),
    2: User(2)
}
contents = {
    "A": Content("A", "News"),
    "B": Content("B", "Sports"),
    "C": Content("C", "Tech")
}
bandit = UCB1Bandit()
for user_id in users:
    user = users[user_id]
    for _ in range(10):
        available_contents = list(contents.values())
        chosen_content = bandit.choose_content(user, available_contents)
        
        reward = random.uniform(0, 1)
        print(f"User   {user_id} recommended: {chosen_content.content_id} Reward: {reward:.2f}")
        user.update_preference(chosen_content.content_id, reward)

for user_id in users:
    print(f"User   {user_id} Preferences: {users[user_id].preferences}")

User   1 recommended: A Reward: 0.68
User   1 recommended: B Reward: 0.41
User   1 recommended: C Reward: 0.37
User   1 recommended: A Reward: 0.23
User   1 recommended: B Reward: 0.41
User   1 recommended: C Reward: 0.61
User   1 recommended: C Reward: 0.55
User   1 recommended: A Reward: 0.59
User   1 recommended: B Reward: 0.03
User   1 recommended: C Reward: 0.64
User   2 recommended: A Reward: 0.65
User   2 recommended: B Reward: 0.49
User   2 recommended: C Reward: 0.91
User   2 recommended: C Reward: 0.75
User   2 recommended: A Reward: 0.65
User   2 recommended: B Reward: 0.91
User   2 recommended: C Reward: 0.77
User   2 recommended: B Reward: 0.03
User   2 recommended: A Reward: 0.93
User   2 recommended: C Reward: 0.55
User   1 Preferences: {'A': (1.4987143149877502, 3), 'B': (0.8395154224215566, 3), 'C': (2.179171477665609, 4)}
User   2 Preferences: {'A': (2.2281265474070246, 3), 'B': (1.4289665031629828, 3), 'C': (2.9835297322622503, 4)}
