In [12]:
!pip install numpy



In [13]:
import numpy as np

class FitnessPredictor:
    """
    Simulates a Deep Learning model that predicts the functional fitness 
    (e.g., expression level) of a synthetic DNA sequence.
    """
    
    def __init__(self, target_sequence='ATTGCAGCTA'):
        self.target_sequence = target_sequence
        self.alphabet = {'A': 0, 'T': 1, 'C': 2, 'G': 3}

    def _calculate_score(self, sequence):
        score = 0
        min_len = min(len(sequence), len(self.target_sequence))
        
        for i in range(min_len):
            if sequence[i] == self.target_sequence[i]:
                score += 1
        
        score -= abs(len(sequence) - len(self.target_sequence)) * 0.5
        
        return max(0, score / len(self.target_sequence))

    def evaluate_fitness(self, sequence: str) -> float:
        if not sequence:
            return 0.0
        return self._calculate_score(sequence)

# Example Usage (testing)
if __name__ == '__main__':
    predictor = FitnessPredictor(target_sequence='ATGCATGC')
    print(f"Target Sequence: {predictor.target_sequence}")
    
    seq_high_fitness = 'ATGCATGT' 
    seq_low_fitness = 'GGGGAAAA'  
    
    print(f"Fitness of '{seq_high_fitness}': {predictor.evaluate_fitness(seq_high_fitness):.4f}")
    print(f"Fitness of '{seq_low_fitness}': {predictor.evaluate_fitness(seq_low_fitness):.4f}")

Target Sequence: ATGCATGC
Fitness of 'ATGCATGT': 0.8750
Fitness of 'GGGGAAAA': 0.2500


In [15]:

# NOTE: We assume FitnessPredictor class from the previous cell is already defined and in memory.

# --- Genetic Algorithm (GA) Implementation ---

class GeneticOptimizer:
    """
    Implements a Genetic Algorithm (GA) for DNA sequence optimization.
    Based on Algorithm 1 .
    """
    
    def __init__(self, predictor_class, seq_len=10, pop_size=50, max_generations=100, 
                 mutation_rate=0.05, elitism_count=2):
        """Initializes the GA parameters."""
        # Use an instance of the class defined in the previous cell
        predictor_instance = predictor_class(target_sequence='ATTGCAGCTA')
        self.F = predictor_instance.evaluate_fitness 
        self.alphabet = ['A', 'T', 'C', 'G']
        self.seq_len = seq_len
        self.pop_size = pop_size
        self.max_generations = max_generations
        self.mutation_rate = mutation_rate
        self.elitism_count = elitism_count
        self.population = []

    def initialize_population(self):
        """Initializes population P_0 with random sequences."""
        self.population = [''.join(random.choice(self.alphabet) 
                                   for _ in range(self.seq_len)) 
                           for _ in range(self.pop_size)]

    def evaluate_and_select(self):
        """Evaluates fitness and selects parents based on Tournament Selection [cite: 44-45]."""
        fitnesses = [self.F(seq) for seq in self.population]
        
        sorted_pop = sorted(zip(self.population, fitnesses), key=lambda x: x[1], reverse=True)
        self.population = [item[0] for item in sorted_pop]
        best_sequence = sorted_pop[0][0]
        best_fitness = sorted_pop[0][1]

        # Tournament Selection
        parents = []
        for _ in range(self.pop_size):
            tournament_candidates = random.sample(self.population, 3)
            winner = max(tournament_candidates, key=self.F)
            parents.append(winner)
            
        return parents, best_sequence, best_fitness

    def crossover(self, parent_a: str, parent_b: str) -> tuple[str, str]:
        """Performs one-point crossover (recombination) [cite: 42, 65-66]."""
        split = random.randint(1, self.seq_len - 1) 
        child1 = parent_a[:split] + parent_b[split:]
        child2 = parent_b[:split] + parent_a[split:]
        return child1, child2

    def mutate(self, sequence: str) -> str:
        """Introduces random genetic alterations (Mutation) [cite: 40, 69-72]."""
        mutated_seq = list(sequence)
        for i in range(self.seq_len):
            if random.random() < self.mutation_rate:
                original_base = mutated_seq[i]
                new_base = random.choice([b for b in self.alphabet if b != original_base])
                mutated_seq[i] = new_base
        return "".join(mutated_seq)

    def evolve(self):
        """The main evolutionary loop [cite: 56-79]."""
        self.initialize_population()
        
        print("Starting AI-Driven Genetic Algorithm Optimization...")
        
        for generation in range(1, self.max_generations + 1):
            parents, s_best, f_best = self.evaluate_and_select()
            
            next_generation = [] 
            next_generation.extend(self.population[:self.elitism_count]) # Elitism

            for i in range(self.pop_size // 2):
                parent_a = parents[i*2]
                parent_b = parents[i*2 + 1]
                
                child1, child2 = self.crossover(parent_a, parent_b)
                
                mutated_child1 = self.mutate(child1)
                mutated_child2 = self.mutate(child2)
                
                next_generation.extend([mutated_child1, mutated_child2])
            
            self.population = next_generation[:self.pop_size]

            if generation % 20 == 0 or generation == self.max_generations:
                 print(f"Gen {generation:3d}: Best Fitness = {f_best:.4f}, Best Seq = {s_best}")
        
        return s_best, f_best

if __name__ == '__main__':
    # Define parameters
    TARGET_SEQ = "ATTGCAGCTA" # This must match the target in FitnessPredictor
    SEQUENCE_LENGTH = len(TARGET_SEQ)
    
    # 1. Instantiate the Genetic Optimizer, passing the FitnessPredictor class
    # NOTE: We assume the FitnessPredictor class is already defined in the cell above this one.
    ga = GeneticOptimizer(
        predictor_class=FitnessPredictor, # Class name from previous cell
        seq_len=SEQUENCE_LENGTH,
        pop_size=100,
        max_generations=200,
        mutation_rate=0.01,
        elitism_count=5
    )
    
    # 2. Run the optimization
    optimized_sequence, final_fitness = ga.evolve()

    print("\n--- Optimization Complete ---")
    print(f"Target Sequence (Conceptual Optimum): {TARGET_SEQ}")
    print(f"Optimized Sequence (S_best): {optimized_sequence}")
    print(f"Final Predicted Fitness: {final_fitness:.4f}")

Starting AI-Driven Genetic Algorithm Optimization...
Gen  20: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen  40: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen  60: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen  80: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen 100: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen 120: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen 140: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen 160: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen 180: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA
Gen 200: Best Fitness = 1.0000, Best Seq = ATTGCAGCTA

--- Optimization Complete ---
Target Sequence (Conceptual Optimum): ATTGCAGCTA
Optimized Sequence (S_best): ATTGCAGCTA
Final Predicted Fitness: 1.0000
