In [2]:
import numpy as np
from typing import List, Tuple, Callable
import random

class Individual:
    def __init__(self, genes: np.ndarray):
        self.genes = genes
        self.fitness = None

class BanditArm:
    def __init__(self, name: str):
        self.name = name
        self.value = 0.0
        self.pulls = 0
        self.total_reward = 0.0

In [4]:
class GeneticBandit:
    def __init__(
        self,
        pop_size: int,
        gene_length: int,
        fitness_func: Callable,
        bounds: List[Tuple[float, float]],
        mutation_rates: List[float] = [0.01, 0.05, 0.1],
        crossover_rates: List[float] = [0.7, 0.8, 0.9]
    ):
        self.pop_size = pop_size
        self.gene_length = gene_length
        self.fitness_func = fitness_func
        self.bounds = bounds
        
        
        self.mutation_arms = [BanditArm(f"mutation_{rate}") for rate in mutation_rates]
        self.crossover_arms = [BanditArm(f"crossover_{rate}") for rate in crossover_rates]
        self.mutation_rates = mutation_rates
        self.crossover_rates = crossover_rates
        
        
        self.population = self._initialize_population()
        self.best_individual = None
        self.best_fitness = float('-inf')
        
        
        self.c = 2.0 
        
    def _initialize_population(self) -> List[Individual]:
        population = []
        for _ in range(self.pop_size):
            genes = np.zeros(self.gene_length)
            for i in range(self.gene_length):
                lower, upper = self.bounds[i]
                genes[i] = random.uniform(lower, upper)
            population.append(Individual(genes))
        return population
    
    def _select_arm(self, arms: List[BanditArm]) -> BanditArm:
        total_pulls = sum(arm.pulls for arm in arms)
        if total_pulls == 0:
            return random.choice(arms)
            
        ucb_values = []
        for arm in arms:
            if arm.pulls == 0:
                return arm
            
           
            exploitation = arm.total_reward / arm.pulls
            exploration = np.sqrt((2 * np.log(total_pulls)) / arm.pulls)
            ucb_value = exploitation + self.c * exploration
            ucb_values.append(ucb_value)
            
        return arms[np.argmax(ucb_values)]
    
    def _update_arm(self, arm: BanditArm, reward: float):
        arm.pulls += 1
        arm.total_reward += reward
        arm.value = arm.total_reward / arm.pulls
    
    def _crossover(self, parent1: Individual, parent2: Individual, rate: float) -> Tuple[Individual, Individual]:
        if random.random() > rate:
            return parent1, parent2
            
        
        mask = np.random.random(self.gene_length) < 0.5
        child1_genes = np.where(mask, parent1.genes, parent2.genes)
        child2_genes = np.where(mask, parent2.genes, parent1.genes)
        
        return Individual(child1_genes), Individual(child2_genes)
    
    def _mutate(self, individual: Individual, rate: float) -> Individual:
        mutated_genes = individual.genes.copy()
        for i in range(self.gene_length):
            if random.random() < rate:
                lower, upper = self.bounds[i]
                mutated_genes[i] = random.uniform(lower, upper)
        return Individual(mutated_genes)
    
    def _tournament_select(self, tournament_size: int = 3) -> Individual:
        tournament = random.sample(self.population, tournament_size)
        return max(tournament, key=lambda ind: ind.fitness)
    
    def optimize(self, generations: int) -> Tuple[np.ndarray, float]:
        for generation in range(generations):
        
            for individual in self.population:
                individual.fitness = self.fitness_func(individual.genes)
                if individual.fitness > self.best_fitness:
                    self.best_fitness = individual.fitness
                    self.best_individual = individual
            
            new_population = []
            
            while len(new_population) < self.pop_size:
               
                parent1 = self._tournament_select()
                parent2 = self._tournament_select()
                
              
                crossover_arm = self._select_arm(self.crossover_arms)
                mutation_arm = self._select_arm(self.mutation_arms)
               
                child1, child2 = self._crossover(
                    parent1, 
                    parent2, 
                    self.crossover_rates[self.crossover_arms.index(crossover_arm)]
                )
                
                child1 = self._mutate(
                    child1, 
                    self.mutation_rates[self.mutation_arms.index(mutation_arm)]
                )
                child2 = self._mutate(
                    child2, 
                    self.mutation_rates[self.mutation_arms.index(mutation_arm)]
                )
                
                child1.fitness = self.fitness_func(child1.genes)
                child2.fitness = self.fitness_func(child2.genes)
                
               
                avg_parent_fitness = (parent1.fitness + parent2.fitness) / 2
                avg_child_fitness = (child1.fitness + child2.fitness) / 2
                improvement = max(0, avg_child_fitness - avg_parent_fitness)
                
                self._update_arm(crossover_arm, improvement)
                self._update_arm(mutation_arm, improvement)
                
                new_population.extend([child1, child2])
            
           
            self.population = sorted(
                new_population, 
                key=lambda ind: ind.fitness, 
                reverse=True
            )[:self.pop_size]
            
        return self.best_individual.genes, self.best_fitness


In [3]:
def run():
    # Function to maximize negative sphere function
    def fitness_function(x):
        return -np.sum(x**2)  
    
    
    dimension = 5
    bounds = [(-5.0, 5.0)] * dimension
    
   
    optimizer = GeneticBandit(
        pop_size=50,
        gene_length=dimension,
        fitness_func=fitness_function,
        bounds=bounds,
        mutation_rates=[0.01, 0.05, 0.1],
        crossover_rates=[0.7, 0.8, 0.9]
    )
    

    best_solution, best_fitness = optimizer.optimize(generations=100)
    return best_solution, best_fitness



solution, fitness = run()
print(f"Best solution: {solution}")
print(f"Best fitness: {fitness}")

Best solution: [-0.14321938 -0.04339658 -0.00618358 -0.00350005  0.02315659]
Best fitness: -0.0229817681432529
