In [4]:
import numpy as np

class Bandit:
    def __init__(self, num_bins, d):
        self.num_bins = num_bins
        self.d = d
        self.beta_parameters = np.zeros((d, num_bins))
        self.uncertainties = np.ones((d, num_bins))
        self.time_step = 0
        self.sum_xt_sq = np.zeros((d, num_bins))
        self.sum_xt_yt = np.zeros((d, num_bins))

    def update_parameters(self, max_risk_score, true_severity):
        self.time_step += 1

        for j in range(self.num_bins):
            for k in range(self.d):
                if bin_boundaries[j]<max_risk_score[k]<bin_boundaries[j+1]:
                    self.sum_xt_sq[k][j] += max_risk_score[k] ** 2
                    self.sum_xt_yt[k][j] += max_risk_score[k] * true_severity[k]

        for j in range(self.num_bins):
            for k in range(self.d):
                if self.sum_xt_sq[k][j] != 0:
                    delta = np.random.random_sample()
                    self.beta_parameters[k][j] = self.sum_xt_yt[k][j] / self.sum_xt_sq[k][j]
                    self.uncertainties[k][j] = np.sqrt(np.log(delta)/ self.sum_xt_sq[k][j])

class BanditAgent:
    def __init__(self, num_bins, d):
        self.num_bins = num_bins
        self.d = d
        self.bandit = Bandit(num_bins, d)
        self.content_pool = []

    def scale_risk_score(self, content):
        return max(self.f(content))

    def f(self, content):
        result = np.zeros(self.d)
        for j in range(self.num_bins):
            for k in range(self.d):
                if bin_boundaries[j] <= content[k] < bin_boundaries[j + 1]:
                    a_ij = self.bandit.beta_parameters[k][j] + self.bandit.uncertainties[k][j]
                    result[k] = a_ij * content[k]
        return result

    def update_bandit(self):
        if len(self.content_pool) == 0:
            return

        max_risk_score = -np.inf
        content_to_review = None

        for content in self.content_pool:
            risk_score = self.scale_risk_score(content)
            if risk_score > max_risk_score:
                max_risk_score = risk_score
                content_to_review = content

        if max_risk_score > 0:
            true_severity = np.random.uniform(0, 10, size=self.d)  # Manual Review (Random for this simulation)
            self.bandit.update_parameters(content, true_severity)

# Define the number of bins and their boundaries
num_bins = 5
bin_boundaries = np.linspace(0, 10, num_bins + 1)

d = 10 # Dimensions of content

num_iterations = 1000

# Create RiskBanditAgent instance
agent = BanditAgent(num_bins, d)

# Main loop
for t in range(num_iterations):
    content = np.random.uniform(0, 10, size=d)
    agent.content_pool.append(content)
    agent.update_bandit()


  self.uncertainties[k][j] = np.sqrt(np.log(delta)/ self.sum_xt_sq[k][j])
