# Explore-Then-Commit Algorithm

In [1]:
import csv
import numpy as np

## Class BanditAlgorithm: Initialization

In [2]:
class BanditAlgorithm:
    def __init__(self, name):
        '''
        Initialisierung
        '''
        self.name = name
        self.results = []

    def add_result(self, timestep, iteration, total_reward, suboptimal_arms, regret, zeros_count, ones_count):
        '''
        Hinzufügen der Ergebnisse zu Liste
        '''
        self.results.append((timestep, iteration, total_reward, suboptimal_arms, round(regret, 2), np.sum(zeros_count), np.sum(ones_count)))

    def save_results_to_csv(self, filename):
        '''
        Hinzufügen der Ergebnisse zu Ausgabe
        '''
        self.results.sort(key=lambda x: (x[1], x[0]))  # Sortiere die Ergebnisse nach der Iterations-ID und dem Zeitstempel
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Timestep', 'Iteration', 'Total Reward', 'Suboptimal Arms', 'Total Regret', 'Zeros Count', 'Ones Count'])
            for result in self.results:
                writer.writerow(result)


    def calculate_average_results(self):
        '''
        Berechnung des durchschnittlichen Verlaufs der 100 Versuche des Algorithmus
        '''
        time_steps = sorted(set(result[0] for result in self.results))
        avg_results = []
        for timestep in time_steps:
            total_reward_sum = 0
            suboptimal_arms_sum = 0
            regret_sum = 0
            zeros_count_sum = 0
            ones_count_sum = 0
            count = 0
            for result in self.results:
                if result[0] == timestep:
                    total_reward_sum += result[2]
                    suboptimal_arms_sum += result[3]
                    regret_sum += result[4]
                    zeros_count_sum += result[5]
                    ones_count_sum += result[6]
                    count += 1
            avg_total_reward = total_reward_sum / count if count > 0 else 0
            avg_suboptimal_arms = suboptimal_arms_sum / count if count > 0 else 0
            avg_regret = regret_sum / count if count > 0 else 0
            avg_zeros_count = zeros_count_sum / count if count > 0 else 0
            avg_ones_count = ones_count_sum / count if count > 0 else 0
            avg_results.append((timestep, avg_total_reward, avg_suboptimal_arms, avg_regret, avg_zeros_count, avg_ones_count))
        return avg_results

### ETC Definition

In [3]:
def explore_then_commit(arm_means, num_arms, total_steps, m):
    '''
    ETC Algorithmus:
    Input: Arme mit erwartetem Reward, optimaler Arm, Anzahl der Zeitschritte
    Output: gesamter Reward nach Ende der Zeitschritte, gezogene Anzahl des suboptimalen Arms, kummuliert Reward Gap als Regret zum jeweiligen Zeitschritt
    '''

    # Choosing the optimal arm
    optimal_arm = np.argmax(arm_means)

    # Initialize variables
    num_pulls = np.zeros(num_arms)
    total_reward = 0
    suboptimal_arms_count = 0
    total_regret = 0
    regret = np.zeros(total_steps)
    zeros_count = np.zeros(total_steps)
    ones_count = np.zeros(total_steps)

    # Exploration Phase: ach arm is pulled once to ensure exploration
    for arm in range(num_arms):
        reward = np.random.binomial(1, arm_means[arm])
        num_pulls[arm] = 1
        total_reward += reward
        regret[arm] = arm_means[optimal_arm] - arm_means[arm]
        total_regret += regret[arm]
        if arm != optimal_arm:
            suboptimal_arms_count += 1
        if reward == 0:
            zeros_count[arm] += 1
        else:
            ones_count[arm] += 1

    # Commit /Exploitation Phase
    for t in range(num_arms, total_steps):
        arm = np.argmax(arm_means)
        reward = np.random.binomial(1, arm_means[arm])
        num_pulls[arm] += 1
        total_reward += reward
        regret[t] = arm_means[optimal_arm] - arm_means[arm]
        total_regret += regret[t]
        if arm != optimal_arm:
            suboptimal_arms_count += 1
        if reward == 0:
            zeros_count[t] += 1
        else:
            ones_count[t] += 1

    total_regret = round(total_regret,2)

    return total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count


### Run Simulation Function

In [4]:
def run_simulation(algorithm, parameters):
    '''
    Wiederholter Aufruf des ETC Algorithmus
    '''

    arm_means = np.array([0.9, 0.8])  # Beispiel für die Mittelwerte der Arme
    num_arms = 2  # Anzahl der Arme
    m = num_arms  # Anzahl der Schritte für das Erkunden

    for iteration in range(1, 101):  # Iteriere über 100 Durchläufe

        for param in parameters:

            total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count = explore_then_commit(
                arm_means, num_arms, param, m)

            # Hinzufügen der Ergebnisse zum Algorithmus-Objekt für alle Parameter
            algorithm.add_result(param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)


### ETC for different time horizons

In [5]:
# Beispiel-Parameter für die Zeit-Horizonte
time_horizons = [2, 3, 100, 200, 2000, 10000, 20000, 40000, 60000, 80000, 100000]

# Beispiel-Algorithmen
algorithms = [
    BanditAlgorithm("1_ETC"),
]

# Simulation durchführen und Ergebnisse speichern
for algorithm in algorithms:
    run_simulation(algorithm, time_horizons)
    results_path = r'C:/Users/canis/OneDrive/Dokumente/uni/uni-surface/FSS 2024/BA/bachelorarbeit_vrlfg/BA/github/BA_code/2_algorithms_results'
    algorithm.save_results_to_csv(f'{results_path}/{algorithm.name}_results_opt_ver1.csv')
    avg_results = algorithm.calculate_average_results()
    with open(f'{results_path}/{algorithm.name}_average_results_opt_ver1.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Timestep', 'Average Total Reward', 'Average Suboptimal Arms', 'Average Regret', 'Average Zeros Count', 'Average Ones Count'])
        for result in avg_results:
            writer.writerow(result)