# UCB-Normal Algorithm

In [1]:
import numpy as np
import csv
import math

# Class BanditAlgorithm: Initialization

In [2]:
class BanditAlgorithm:
    def __init__(self, name):
        self.name = name
        self.results = []

    def add_result(self, param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count):
        self.results.append([param, iteration, total_reward, suboptimal_arms_count, round(total_regret, 2), zeros_count, ones_count])

    def save_results_to_csv(self, filename):
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Timestep', 'Iteration', 'Total Reward', 'Suboptimal Arms', 'Total Regret', 'Zeros Count', 'Ones Count'])
            for result in self.results:
                writer.writerow(result)

    def calculate_average_results(self):
        avg_results = {}
        for result in self.results:
            param = result[0]
            if param not in avg_results:
                avg_results[param] = [0, 0, 0, 0, 0]
            avg_results[param][0] += result[2]  # Total Reward
            avg_results[param][1] += result[3]  # Suboptimal Arms Count
            avg_results[param][2] += result[4]  # Total Regret
            avg_results[param][3] += result[5]  # Zeros Count
            avg_results[param][4] += result[6]  # Ones Count
        
        for param in avg_results:
            avg_results[param] = [param] + [x / 100 for x in avg_results[param]]
        return list(avg_results.values())

## Definition UCB-Normal

In [3]:
def UCB_normal_simulation(algorithm, arm_means, arm_variances, time_horizon):
    num_arms = len(arm_means)
    rewards = np.zeros(num_arms)
    counts = np.zeros(num_arms)
    sum_of_squares = np.zeros(num_arms)
    total_reward = 0
    suboptimal_arms_count = 0
    total_regret = 0
    zeros_count = 0
    ones_count = 0

    for t in range(1, time_horizon + 1):
        if any(counts < math.ceil(8 * np.log(time_horizon))):
            arm = np.argmin(counts)
        else:
            ucb_values = np.zeros(num_arms)
            for j in range(num_arms):
                if counts[j] > 1:
                    mean_reward = rewards[j] / counts[j]
                    variance = (sum_of_squares[j] - counts[j] * (mean_reward ** 2)) / (counts[j])
                    ucb_values[j] = mean_reward + np.sqrt(16 * variance * np.log(t - 1) / counts[j])
            arm = np.argmax(ucb_values)
        
        reward = np.random.normal(arm_means[arm], np.sqrt(arm_variances[arm]))
        counts[arm] += 1
        rewards[arm] += reward
        sum_of_squares[arm] += reward ** 2
        total_reward += reward

        if reward <= arm_means[arm]:
            zeros_count += 1
        else:
            ones_count += 1

        if arm != np.argmax(arm_means):
            suboptimal_arms_count += 1
            total_regret += np.max(arm_means) - arm_means[arm]

    return total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count

## Run Simulation Function

In [4]:
def run_simulation(algorithm, parameters, arm_means, arm_variances):
    for iteration in range(1, 101):
        for param in parameters:
            total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count = UCB_normal_simulation(algorithm, arm_means, arm_variances, param)
            algorithm.add_result(param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)


## UCB-Normal for different time horizons

In [5]:
time_horizons = [2, 3, 100, 200, 2000, 10000, 20000, 40000, 60000, 80000, 100000]

# Beispiel-Algorithmen
algorithms = [
    BanditAlgorithm("4_UCB-Normal"),
]

# Beispiel-Mittelwerte und Varianzen der Arme
arm_means = np.array([0.495, 0.5])
arm_variances = np.array([0.249975, 0.25])

# Simulation durchführen und Ergebnisse speichern
for algorithm in algorithms:
    run_simulation(algorithm, time_horizons, arm_means, arm_variances)
    results_path = r'C:/Users/canis/OneDrive\Dokumente/uni/uni-surface/FSS 2024/BA/bachelorarbeit_vrlfg/BA/github/BA_code/2_algorithms_results'
    algorithm.save_results_to_csv(f'{results_path}\{algorithm.name}_results_subopt_ver3.csv')
    avg_results = algorithm.calculate_average_results()
    with open(f'{results_path}\{algorithm.name}_average_results_subopt_ver3.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Timestep', 'Average Total Reward', 'Average Suboptimal Arms', 'Average Regret', 'Average Zeros Count', 'Average Ones Count'])
        for result in avg_results:
            writer.writerow(result)


zero_counts und ones_count hier modifiziert zu </> arm means des maximalen arms ähnlich zur binomialverteilung in anderen algorithmen, bei denen bei ones_count auch nur ein wert mit reward = 1, der von beiden armen erreicht werden kann (hier mindestens dem maximalen durchschnittlichen reward entspechend), da keine eindeutige zuordnung möglich.

varianz errechnet aus Varianz, die binomialverteilung für die jeweiligen Arme hätte