In [1]:
import numpy as np
import csv
import math

class BanditAlgorithm:
    def __init__(self, name):
        self.name = name
        self.results = []

    def add_result(self, param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count):
        self.results.append([param, iteration, total_reward, suboptimal_arms_count, round(total_regret, 2), zeros_count, ones_count])

    def save_results_to_csv(self, filename):
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Timestep', 'Iteration', 'Total Reward', 'Suboptimal Arms', 'Total Regret', 'Zeros Count', 'Ones Count'])
            for result in self.results:
                writer.writerow(result)

    def calculate_average_results(self):
        avg_results = {}
        for result in self.results:
            param = result[0]
            if param not in avg_results:
                avg_results[param] = [0, 0, 0, 0, 0]
            avg_results[param][0] += result[2]  # Total Reward
            avg_results[param][1] += result[3]  # Suboptimal Arms Count
            avg_results[param][2] += result[4]  # Total Regret
            avg_results[param][3] += result[5]  # Zeros Count
            avg_results[param][4] += result[6]  # Ones Count
        
        for param in avg_results:
            avg_results[param] = [param] + [x / 100 for x in avg_results[param]]
        return list(avg_results.values())

def EUCBV_simulation(arm_means, total_steps):
    K = len(arm_means)
    rho = 1 / 2
    psi = total_steps / (K**2)
    
    T_k = np.zeros(K)  # Number of times each arm has been played
    X_k = np.zeros(K)  # Sum of rewards for each arm
    sum_of_squares = np.zeros(K)  # Sum of squared differences for each arm
    
    zeros_count = 0
    ones_count = 0
    
    B_t = set(range(K))
    delta_t = 1
    total_reward = 0
    suboptimal_arms_count = 0
    total_regret = 0
    
    regret = np.zeros(total_steps)
    suboptimal_arms = np.zeros(total_steps, dtype=int)
    total_rewards = np.zeros(total_steps)
    zeros_counts = np.zeros(total_steps, dtype=int)
    ones_counts = np.zeros(total_steps, dtype=int)

    def play_arm(arm, t):
        nonlocal zeros_count, ones_count, total_reward, suboptimal_arms_count, total_regret
        reward = np.random.binomial(1, arm_means[arm])
        T_k[arm] += 1
        X_k[arm] += reward
        sum_of_squares[arm] += reward**2
        total_reward += reward
        
        if reward == 0:
            zeros_count += 1
        else:
            ones_count += 1
        
        if arm != np.argmax(arm_means):
            suboptimal_arms_count += 1
            total_regret += np.max(arm_means) - arm_means[arm]

        total_rewards[t] = total_reward
        regret[t] = total_regret
        suboptimal_arms[t] = suboptimal_arms_count
        zeros_counts[t] = zeros_count
        ones_counts[t] = ones_count
        
        return reward

    for arm in range(K):
        play_arm(arm, arm)
    
    M = int(math.floor(0.5 * math.log2(total_steps / math.exp(1))))
    m = 0
    n_0 = int(math.ceil(math.log(psi * total_steps * delta_t**2) / (2 * delta_t)))
    N_0 = K * n_0
    
    for t in range(K, total_steps):
        if len(B_t) == 1:
            best_arm = next(iter(B_t))
            play_arm(best_arm, t)
            continue
        
        selected_arm = max(B_t, key=lambda arm_index: (X_k[arm_index] / T_k[arm_index]) + math.sqrt((rho * ((sum_of_squares[arm_index] / T_k[arm_index]) - (X_k[arm_index] / T_k[arm_index])**2 + 2) * math.log(psi * total_steps * delta_t)) / (4 * T_k[arm_index])))
        play_arm(selected_arm, t)
        
        for arm_index in list(B_t):
            mean_estimate = X_k[arm_index] / T_k[arm_index]
            variance_estimate = (sum_of_squares[arm_index] - T_k[arm_index] * (mean_estimate ** 2)) / T_k[arm_index]
            bound = math.sqrt((rho * (variance_estimate + 2) * math.log(psi * total_steps * delta_t)) / (4 * T_k[arm_index]))
            
            for k in range(K):
                mean_reward = X_k[k] / T_k[k]
                EUCBV = mean_reward - math.sqrt((rho * ((sum_of_squares[k] / T_k[k]) - (X_k[k] / T_k[k])**2 + 2) * math.log(psi * total_steps * delta_t)) / (4 * T_k[k]))
            maximum = np.max(EUCBV)

            if mean_estimate + bound < maximum:
                B_t.remove(arm_index)
        
        if t >= N_0 and m <= M:
            delta_t /= 2
            B_t = B_t
            n_0 = int(math.ceil(math.log(psi * total_steps * delta_t**2) / (2 * delta_t)))
            N_0 = t + len(B_t) * n_0
            m += 1

    return {
        "total_rewards": total_rewards,
        "suboptimal_arms": suboptimal_arms,
        "regret": regret,
        "zeros_counts": zeros_counts,
        "ones_counts": ones_counts
    }

def general_simulation(algorithm, arm_means, parameters, strategy_fn):
    max_time_horizon = max(parameters)
    num_arms = len(arm_means)

    for iteration in range(1, 101):
        results = strategy_fn(arm_means, max_time_horizon)

        for param in parameters:
            total_reward = results["total_rewards"][param - 1]
            suboptimal_arms_count = results["suboptimal_arms"][param - 1]
            total_regret = results["regret"][param - 1]
            zeros_count = results["zeros_counts"][param - 1]
            ones_count = results["ones_counts"][param - 1]

            algorithm.add_result(param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)

# Beispiel-Parameter für die Zeit-Horizonte
time_horizons = [2, 3, 100, 200, 2000, 10000, 20000, 40000, 60000, 80000, 100000]

# Beispiel-Algorithmen
algorithms = [
    BanditAlgorithm("9_EUCBV"),
]

# Beispiel-Mittelwerte der Arme
arm_means = np.array([0.9, 0.8])

# Simulation durchführen und Ergebnisse speichern
results_path = r'C:/Users/canis/OneDrive\Dokumente/uni/uni-surface/FSS 2024/BA/bachelorarbeit_vrlfg/BA/github/BA_code/2_algorithms_results'
for algorithm in algorithms:
    general_simulation(algorithm, arm_means, time_horizons, EUCBV_simulation)
    algorithm.save_results_to_csv(f'{results_path}\{algorithm.name}_new_results_opt_ver1.csv')
    avg_results = algorithm.calculate_average_results()
    with open(f'{results_path}\{algorithm.name}_new_average_results_opt_ver1.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Timestep', 'Average Total Reward', 'Average Suboptimal Arms', 'Average Regret', 'Average Zeros Count', 'Average Ones Count'])
        for result in avg_results:
            writer.writerow(result)
