In [7]:
import numpy as np
import csv
import math

class BanditAlgorithm:
    def __init__(self, name):
        self.name = name
        self.results = []

    def add_result(self, timestep, iteration, total_reward, suboptimal_arms, total_regret, zeros_count, ones_count):
        self.results.append((timestep, iteration, total_reward, suboptimal_arms, round(total_regret, 2), np.sum(zeros_count), np.sum(ones_count)))

    def save_results_to_csv(self, filename):
        self.results.sort(key=lambda x: (x[1], x[0]))
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Timestep', 'Iteration', 'Total Reward', 'Suboptimal Arms', 'Total Regret', 'Zeros Count', 'Ones Count'])
            for result in self.results:
                writer.writerow(result)

    def calculate_average_results(self):
        time_steps = sorted(set(result[0] for result in self.results))
        avg_results = []
        for timestep in time_steps:
            total_reward_sum = 0
            suboptimal_arms_sum = 0
            regret_sum = 0
            zeros_count_sum = 0
            ones_count_sum = 0
            count = 0
            for result in self.results:
                if result[0] == timestep:
                    total_reward_sum += result[2]
                    suboptimal_arms_sum += result[3]
                    regret_sum += result[4]
                    zeros_count_sum += result[5]
                    ones_count_sum += result[6]
                    count += 1
            avg_total_reward = total_reward_sum / count if count > 0 else 0
            avg_suboptimal_arms = suboptimal_arms_sum / count if count > 0 else 0
            avg_regret = regret_sum / count if count > 0 else 0
            avg_zeros_count = zeros_count_sum / count if count > 0 else 0
            avg_ones_count = ones_count_sum / count if count > 0 else 0
            avg_results.append((timestep, avg_total_reward, avg_suboptimal_arms, avg_regret, avg_zeros_count, avg_ones_count))
        return avg_results

def general_simulation(algorithm, arm_means, parameters, strategy_fn, **kwargs):
    max_time_horizon = max(parameters)
    num_arms = len(arm_means)
    
    for iteration in range(1, 101):
        results = strategy_fn(arm_means, num_arms, max_time_horizon, **kwargs)
        
        for param in parameters:
            total_reward = np.sum(results["rewards"][:param])
            suboptimal_arms_count = np.sum(results["suboptimal_arms"][:param])
            total_regret = np.sum(results["regret"][:param])
            zeros_count = np.sum(results["zeros_count"][:param])
            ones_count = np.sum(results["ones_count"][:param])
            
            algorithm.add_result(param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)


class UCB1:
    def __init__(self):
        self.counts = []
        self.values = []


    def initialize(self, n_arms):
        self.counts = [0] * n_arms
        self.values = [0.0] * n_arms

    def select_arm(self):
        n_arms = len(self.counts)
        for arm in range(n_arms):
            if self.counts[arm] == 0:
                return arm

        total_counts = sum(self.counts)
        ucb_values = [0.0] * n_arms

        for arm in range(n_arms):
            bonus = math.sqrt((2 * math.log(total_counts)) / self.counts[arm])
            ucb_values[arm] = self.values[arm] + bonus

        return ucb_values.index(max(ucb_values))

    def update(self, chosen_arm, reward):
        self.counts[chosen_arm] += 1
        n = self.counts[chosen_arm]
        value = self.values[chosen_arm]
        new_value = ((n - 1) / n) * value + (1 / n) * reward
        self.values[chosen_arm] = new_value

def UCB_simulation(arm_means, num_arms, total_steps):
    ucb = UCB1()
    ucb.initialize(num_arms)
    regret = np.zeros(total_steps)
    total_regret = 0
    total_reward = 0
    
    rewards = np.zeros(total_steps)
    suboptimal_arms = np.zeros(total_steps, dtype=int)
    regret = np.zeros(total_steps)
    zeros_count = np.zeros(total_steps, dtype=int)
    ones_count = np.zeros(total_steps, dtype=int)

    for t in range(total_steps):
        chosen_arm = ucb.select_arm()
        reward = np.random.binomial(1, arm_means[chosen_arm])
        total_reward += reward
        ucb.update(chosen_arm, reward)
        total_regret += regret[t]
        rewards[t] = reward
        regret[t] = np.max(arm_means) - arm_means[chosen_arm]
        if chosen_arm != np.argmax(arm_means):
            suboptimal_arms[t] = 1
        if reward == 0:
            zeros_count[t] = 1
        else:
            ones_count[t] = 1


    return {
        "rewards": rewards,
        "suboptimal_arms": suboptimal_arms,
        "regret": regret,
        "zeros_count": zeros_count,
        "ones_count": ones_count
    }

# Beispiel-Parameter für die Zeit-Horizonte
time_horizons = [2, 3, 100, 200, 2000, 10000, 20000, 40000, 60000, 80000, 100000]

# Beispiel-Algorithmen
algorithms = [
    BanditAlgorithm("3_UCB"),
]

# Beispiel-Mittelwerte der Arme
arm_means = np.array([0.9, 0.8])  # Beispiel für die Mittelwerte der Arme

# Simulation durchführen und Ergebnisse speichern
results_path = r'C:/Users/canis/OneDrive/Dokumente/uni/uni-surface/FSS 2024/BA/bachelorarbeit_vrlfg/BA/github/BA_code/2_algorithms_results'
for algorithm in algorithms:
    general_simulation(algorithm, arm_means, time_horizons, UCB_simulation)
    algorithm.save_results_to_csv(f'{results_path}/{algorithm.name}_new_results_opt_ver1.csv')
    avg_results = algorithm.calculate_average_results()
    with open(f'{results_path}/{algorithm.name}_average_new_results_opt_ver1.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Timestep', 'Average Total Reward', 'Average Suboptimal Arms', 'Average Regret', 'Average Zeros Count', 'Average Ones Count'])
        for result in avg_results:
            writer.writerow(result)
