# epsilon - Greedy Algorithm

In [6]:
import numpy as np
import csv

## Class BanditAlgorithm: Initialization

In [7]:
class BanditAlgorithm:
    def __init__(self, name):
        self.name = name
        self.results = []

    def add_result(self, timestep, iteration, total_reward, suboptimal_arms, regret, zeros_count, ones_count):
        self.results.append((timestep, iteration, total_reward, suboptimal_arms, regret, np.sum(zeros_count), np.sum(ones_count)))

    def save_results_to_csv(self, filename):
        self.results.sort(key=lambda x: (x[1], x[0]))
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Timestep', 'Iteration', 'Total Reward', 'Suboptimal Arms', 'Regret', 'Zeros Count', 'Ones Count'])
            for result in self.results:
                writer.writerow(result)

    def calculate_average_results(self):
        time_steps = sorted(set(result[0] for result in self.results))
        avg_results = []
        for timestep in time_steps:
            total_reward_sum = 0
            suboptimal_arms_sum = 0
            regret_sum = 0
            zeros_count_sum = 0
            ones_count_sum = 0
            count = 0
            for result in self.results:
                if result[0] == timestep:
                    total_reward_sum += result[2]
                    suboptimal_arms_sum += result[3]
                    regret_sum += result[4]
                    zeros_count_sum += result[5]
                    ones_count_sum += result[6]
                    count += 1
            avg_total_reward = total_reward_sum / count if count > 0 else 0
            avg_suboptimal_arms = suboptimal_arms_sum / count if count > 0 else 0
            avg_regret = regret_sum / count if count > 0 else 0
            avg_zeros_count = zeros_count_sum / count if count > 0 else 0
            avg_ones_count = ones_count_sum / count if count > 0 else 0
            avg_results.append((timestep, avg_total_reward, avg_suboptimal_arms, avg_regret, avg_zeros_count, avg_ones_count))
        return avg_results

### epsilon-Greedy Defintion

In [8]:

def epsilon_greedy(arm_means, num_arms, total_steps, epsilon):
    Q = np.zeros(num_arms)
    N = np.zeros(num_arms)
    total_reward = 0
    suboptimal_arms_count = 0
    total_regret = 0
    regret = np.zeros(total_steps)
    zeros_count = np.zeros(total_steps)
    ones_count = np.zeros(total_steps)

    for t in range(total_steps):
        if np.random.rand() < epsilon:
            # Exploration
            arm = np.random.choice(num_arms)
        else:
            # Exploitation
            arm = np.argmax(Q)

        reward = np.random.binomial(1, arm_means[arm])
        total_reward += reward
        N[arm] += 1
        Q[arm] += (reward - Q[arm]) / N[arm]  # Update Q-value incrementally, calculation is already memory optimized
        regret[t] = np.max(arm_means) - arm_means[arm]
        total_regret += regret[t]
        if arm != np.argmax(arm_means):
            suboptimal_arms_count += 1
        if reward == 0:
            zeros_count[t] += 1
        else:
            ones_count[t] += 1

    total_regret = round(total_regret, 1)

    return total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count


### Run Simulation Function

In [9]:
def run_simulation(algorithm, parameters):
    arm_means = np.array([0.9, 0.8])  # Example mean rewards of arms
    num_arms = len(arm_means)
    epsilon = 0.1  # Epsilon value for epsilon-greedy
    for iteration in range(1, 101):
        for param in parameters:
            total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count = epsilon_greedy(arm_means, num_arms, param, epsilon)
            algorithm.add_result(param, iteration, total_reward, suboptimal_arms_count, total_regret, zeros_count, ones_count)


### epsilon - greedy for different time horizons

In [10]:
time_horizons = [2, 3, 100, 200, 2000, 10000, 20000, 40000, 60000, 80000, 100000]

algorithms = [
    BanditAlgorithm("EpsilonGreedy"),
]

for algorithm in algorithms:
    run_simulation(algorithm, time_horizons)
    algorithm.save_results_to_csv(algorithm.name + '_results.csv')
    avg_results = algorithm.calculate_average_results()
    with open(algorithm.name + '_average_results.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Timestep', 'Average Total Reward', 'Average Suboptimal Arms', 'Average Regret', 'Average Zeros Count', 'Average Ones Count'])
        for result in avg_results:
            writer.writerow(result)