In [10]:
import numpy as np
import csv
import math

class BanditAlgorithm:
    def __init__(self, name):
        self.name = name
        self.results = []

    def add_result(self, param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count):
        self.results.append([param, iteration, total_reward, suboptimal_arms_count, round(total_regret, 2), zeros_count, ones_count])

    def save_results_to_csv(self, filename):
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Timestep', 'Iteration', 'Total Reward', 'Suboptimal Arms Count', 'Total Regret', 'Zeros Count', 'Ones Count'])
            for result in self.results:
                writer.writerow(result)

    def calculate_average_results(self):
        avg_results = {}
        for result in self.results:
            param = result[0]
            if param not in avg_results:
                avg_results[param] = [0, 0, 0, 0, 0]
            avg_results[param][0] += result[2]  # Total Reward
            avg_results[param][1] += result[3]  # Suboptimal Arms Count
            avg_results[param][2] += result[4]  # Total Regret
            avg_results[param][3] += result[5]  # Zeros Count
            avg_results[param][4] += result[6]  # Ones Count
        
        for param in avg_results:
            avg_results[param] = [param] + [x / 100 for x in avg_results[param]]
        return list(avg_results.values())

def ETC_simulation(algorithm, arm_means, time_horizons):
    max_time_horizon = max(time_horizons)
    K = len(arm_means)
    exploration_rounds = 1000

    counts = np.zeros(K, dtype=int)
    rewards = np.zeros(K)
    sum_of_squares = np.zeros(K)

    total_reward = 0
    zeros_count = 0
    ones_count = 0
    suboptimal_arms_count = 0
    total_regret = 0

    results = {t: None for t in time_horizons}

    for t in range(1, max_time_horizon + 1):
        if t <= exploration_rounds * K:
            arm = (t - 1) % K
        else:
            arm = np.argmax(rewards / (counts))

        reward = np.random.binomial(1, arm_means[arm])
        counts[arm] += 1
        rewards[arm] += reward
        sum_of_squares[arm] += reward ** 2
        total_reward += reward

        if reward == 0:
            zeros_count += 1
        else:
            ones_count += 1

        if arm != np.argmax(arm_means):
            suboptimal_arms_count += 1
            total_regret += np.max(arm_means) - arm_means[arm]

        if t in time_horizons:
            results[t] = (total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)

    return results

def run_simulation(algorithm, parameters, arm_means):
    max_param = max(parameters)
    for iteration in range(1, 101):
        results = ETC_simulation(algorithm, arm_means, parameters)
        for param in parameters:
            total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count = results[param]
            algorithm.add_result(param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)

time_horizons = [2, 3, 100, 200, 2000, 10000, 20000, 40000, 60000, 80000, 100000]

# Beispiel-Algorithmen
algorithms = [
    BanditAlgorithm("1_ETC"),
]

arm_means = np.array([0.495, 0.5])
    
# Simulation durchführen und Ergebnisse speichern
results_path = r'C:/Users/wolfe/Documents/BA/varianceinucbalgorithms/2_algorithms_results'
for algorithm in algorithms:
    run_simulation(algorithm, time_horizons, arm_means)
    algorithm.save_results_to_csv(f'{results_path}/{algorithm.name}_results_subopt_ver3.csv')
    avg_results = algorithm.calculate_average_results()
    with open(f'{results_path}/{algorithm.name}_average_results_subopt_ver3.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Timestep', 'Average Total Reward', 'Average Suboptimal Arms', 'Average Regret', 'Average Zeros Count', 'Average Ones Count'])
        for result in avg_results:
            writer.writerow(result)
