<a href="https://colab.research.google.com/github/jamessutton600613-png/GC/blob/main/WorkingUntitled176gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# First, ensure you have a compatible NVIDIA driver and CUDA toolkit installed.

# Uninstall the CPU versions if they exist
!pip uninstall -y qiskit-aer numpy qiskit

# Install the GPU versions (match your CUDA version, e.g., cupy-cuda12x)
!pip install 'qiskit<2.0' qiskit-aer-gpu cupy-cuda12x numpy pandas matplotlib tqdm

[0mFound existing installation: numpy 2.0.2
Uninstalling numpy-2.0.2:
  Successfully uninstalled numpy-2.0.2
[0mCollecting qiskit<2.0
  Downloading qiskit-1.4.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting qiskit-aer-gpu
  Downloading qiskit_aer_gpu-0.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.3 kB)
Collecting numpy
  Downloading numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting rustworkx>=0.15.0 (from qiskit<2.0)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting stevedore>=3.0.0 (from qiskit<2.0)
  Downloading stevedore-5.4.1-py3-none-any.whl.metadata (2.3 kB)
Collecting symengine<0.14,>=0.11 (from qiskit<2.0)
  Downloading symengine-0.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.me

In [None]:
import os
import sys
import cupy as cp  # Use CuPy for GPU arrays
import numpy as np # Keep NumPy for CPU-specific tasks and compatibility
import pandas as pd
import random
import pickle
from datetime import datetime
from google.colab import output
from tqdm.notebook import tqdm
from qiskit import QuantumCircuit, transpile
from qiskit_aer import AerSimulator
from google.colab import drive

# Mount Google Drive to save the output files
drive.mount('/content/drive', force_remount=True)

class QuantumRandomGenerator:
    """
    Generates random seeds using a quantum simulator running on the GPU.
    """
    def __init__(self, num_bits_precision=64):
        # Initialize the simulator to run on the GPU
        self.simulator = AerSimulator(device='GPU')
        self.num_bits = num_bits_precision

    def get_seed(self):
        qc = QuantumCircuit(self.num_bits, self.num_bits)
        qc.h(range(self.num_bits))
        qc.measure(range(self.num_bits), range(self.num_bits))
        # The run command will now be offloaded to the GPU
        job = self.simulator.run(transpile(qc, self.simulator), shots=1)
        return int(list(job.result().get_counts(0).keys())[0], 2)

class Environment:
    """
    Generates environmental data using CuPy for GPU acceleration.
    The final data is moved to the CPU for compatibility with the simulation loop.
    """
    def __init__(self, rng, days_per_cycle=50):
        self.days_per_cycle, self.time = days_per_cycle, 0
        self.rng = rng
        self.steps_per_year = 365 * self.days_per_cycle

        # Generate cycles on the GPU
        uv_gpu, temp_gpu = self._generate_cycles_gpu(num_years=50)

        # IMPORTANT: Move data from GPU back to CPU for general access
        self.uv_intensity = uv_gpu.get()
        self.temperature = temp_gpu.get()

    def _generate_cycles_gpu(self, num_years):
        # Use CuPy (cp) instead of NumPy (np) for array operations
        total_steps = num_years * self.steps_per_year

        # Create arrays directly on the GPU
        daily_uv_cycle = cp.sin(cp.linspace(0, 2 * cp.pi, self.days_per_cycle)); daily_uv_cycle[daily_uv_cycle < 0] = 0
        daily_temp_swing = -4 * cp.cos(cp.linspace(0, 2 * cp.pi, self.days_per_cycle))

        # Generate random noise on CPU and move it to the GPU
        base_weather_noise = cp.asarray(self.rng.standard_normal(total_steps))
        smoothing_window = cp.ones(14 * self.days_per_cycle) / (14 * self.days_per_cycle)

        # Perform convolution on the GPU
        weather_pattern = cp.convolve(base_weather_noise, smoothing_window, 'same') * 5.0

        full_uv, full_temp = [], []
        global_step_counter = 0

        # Move arrays to CPU for looping, which is faster for this structure
        seasonal_temp_base = (15 - 10 * cp.cos(cp.linspace(0, 2 * cp.pi, self.steps_per_year))).get()
        seasonal_amplitude_mod = ((0.225 * cp.sin(cp.linspace(0, 2 * cp.pi, self.steps_per_year)) + 0.725)).get() # Calculate on GPU, move to CPU


        daily_temp_anomaly = 0
        for i in range(self.steps_per_year):
            if i % self.days_per_cycle == 0: daily_temp_anomaly = self.rng.uniform(-1.5, 1.5)
            cloud_cover_factor = self.rng.uniform(0.7, 1.0)

            # Ensure values are on CPU before appending to lists
            daily_uv = (daily_uv_cycle[i % self.days_per_cycle] * seasonal_amplitude_mod[i] * cloud_cover_factor).get()
            full_uv.append(daily_uv)

            # Ensure solar_temp is calculated and potentially moved to CPU before max() and appending
            solar_temp_gpu = seasonal_temp_base[i] + daily_temp_swing[i % self.days_per_cycle] + weather_pattern[global_step_counter] + daily_temp_anomaly
            solar_temp = max(4.0, solar_temp_gpu.get()) # max() will work correctly if solar_temp_gpu is on CPU
            full_temp.append(solar_temp) # Append the CPU value

            global_step_counter += 1

        # Return final arrays, which are on the GPU (created from CPU lists)
        return cp.array(full_uv), cp.array(full_temp)


    def get_current_uv(self): return self.uv_intensity[self.time % len(self.uv_intensity)]
    def get_current_temperature(self): return self.temperature[self.time % len(self.temperature)]
    def step(self): self.time += 1

class Protoribosome:
    def __init__(self, env, strategy, initial_sequence, rng, initial_mass=100.0):
        self.env, self.strategy, self.rna_sequence, self.rng = env, strategy, list(initial_sequence), rng
        self.rna_mass = initial_mass
        self.uv_protection_pool = 50.0
        self.rna_damage_level, self.location, self.status = 0.0, 'shadow_zone', 'ACTIVE'
        # Increased initial nucleotide pools as requested
        self.nmp_pool = {'A': 20000, 'U': 20000, 'G': 20000, 'C': 20000}
        self.ndp_pool = {'A': 10000,  'U': 10000,  'G': 10000,  'C': 10000}
        self.ntp_pool = {'A': 500,  'U': 500,  'G': 500,  'C': 500}

        if self.strategy == 'cautious': self.uv_damage_rate_per_uv = 1.0
        else: self.uv_damage_rate_per_uv = 1.4
        self.repair_rate, self.uv_protection_factor, self.damage_tolerance_threshold = 0.5, 0.01, 3.0
        self.mutation_prob = 0.004

    def calculate_protection_score(self):
        score = (len(self.rna_sequence) // 3) * 0.1
        weights = {'UGG':5.0,'UAU':3.0,'UAC':3.0,'UUU':1.5,'UUC':1.5,'UGU':1.0,'UGC':1.0,'AUG':0.75,'CAU':0.5,'CAC':0.5}
        score += sum(weights.get("".join(self.rna_sequence[i:i+3]), 0) for i in range(0, len(self.rna_sequence), 3))
        return score

    def has_stop_codon(self):
        return any("".join(self.rna_sequence[i:i+3]) in {'UAA','UAG','UGA'} for i in range(0, len(self.rna_sequence), 3))

    def _forage_and_phosphorylate(self, current_uv):
        # Increased foraging amount from 5 to 10 for each NMP type
        for base in self.nmp_pool: self.nmp_pool[base] += 10
        for base in self.nmp_pool:
            if self.nmp_pool[base] > 0: self.nmp_pool[base] -= 1; self.ndp_pool[base] += 1
        energy = self.uv_protection_pool * current_uv * 0.1
        conversions = int(energy / 5)
        for _ in range(conversions):
            base = self.rng.choice(list(self.ndp_pool.keys()))
            if self.ndp_pool[base] > 0: self.ndp_pool[base] -= 1; self.ntp_pool[base] += 1

    def _replicate_rna(self):
        # Check if parent has enough mass to potentially replicate
        if self.status != 'ACTIVE' or self.rna_mass < 80:
            # print(f"Debug: Replication skipped - Status: {self.status}, Mass: {self.rna_mass}") # Debug print
            return None

        # Check for sufficient NTPs to synthesize a new RNA sequence
        required_ntps = {base: self.rna_sequence.count(base) for base in 'AUGC'}
        # print(f"Debug: Required NTPs for replication: {required_ntps}, Available NTPs: {self.ntp_pool}") # Debug print
        if not all(self.ntp_pool[base] >= count for base, count in required_ntps.items()):
            # print("Debug: Replication skipped - Insufficient NTPs") # Debug print
            return None

        # Consume the required NTPs
        for base, count in required_ntps.items():
            self.ntp_pool[base] -= count

        # Calculate the mass of the new RNA sequence (assuming 1 unit mass per nucleotide)
        new_rna_mass = len(self.rna_sequence)

        # The parent retains its mass, and a new offspring is created with the mass of the new RNA
        # (Based on the user's clarification of duplication then division)
        offspring_mass = new_rna_mass # Mass of the newly synthesized RNA

        # Create the offspring Protoribosome with the same sequence and strategy
        offspring = Protoribosome(self.env, self.strategy, "".join(self.rna_sequence), self.rng, offspring_mass)

        # Mutation can occur in the offspring's sequence
        offspring.rna_sequence = list("".join(self.rna_sequence)) # Ensure a copy for mutation
        if self.rng.random() < self.mutation_prob:
             idx = self.rng.integers(len(offspring.rna_sequence))
             offspring.rna_sequence[idx] = self.rng.choice(list('AUGC'))

        # print(f"Debug: Replication successful - Parent Mass: {self.rna_mass}, Offspring Mass: {offspring.rna_mass}") # Debug print
        return offspring

    def step(self, current_uv):
        # print(f"Debug: Step for {self.strategy} protoribosome. Status: {self.status}, Damage: {self.rna_damage_level}") # Debug print
        if self.status == 'INACTIVE':
            # print("Debug: Step skipped - INACTIVE") # Debug print
            return None

        self._forage_and_phosphorylate(current_uv)

        protection = self.uv_protection_pool * self.uv_protection_factor
        effective_uv = current_uv * max(0.01, 1 - protection)
        self.rna_damage_level += effective_uv * self.uv_damage_rate_per_uv
        self.rna_damage_level = max(0, self.rna_damage_level - self.repair_rate)

        if self.rna_damage_level > self.damage_tolerance_threshold:
            # print(f"Debug: Protoribosome becoming INACTIVE due to damage: {self.rna_damage_level}") # Debug print
            self.status = 'INACTIVE'
            return None

        has_stop = self.has_stop_codon()
        if self.strategy == 'cautious' and has_stop:
            if self.status != 'ARRESTED':
                 # print("Debug: Cautious protoribosome becoming ARRESTED due to stop codon") # Debug print
                 self.status = 'ARRESTED'
        elif self.status == 'ARRESTED' and not has_stop:
            # print("Debug: ARRESTED protoribosome becoming ACTIVE (stop codon removed)") # Debug print
            self.status = 'ACTIVE'
        elif self.strategy == 'readthrough' and has_stop:
            # print("Debug: Readthrough protoribosome becoming INACTIVE due to stop codon") # Debug print
            self.status = 'INACTIVE'
            return None

        if self.status == 'INACTIVE': # Re-check status after potential change
             # print("Debug: Step ending - INACTIVE") # Debug print
             return None
        if self.status == 'ACTIVE':
             self.uv_protection_pool += 0.20 * self.calculate_protection_score()
             # print(f"Debug: ACTIVE protoribosome - UV Protection Pool: {self.uv_protection_pool}") # Debug print

        return self._replicate_rna()

class Colony:
    def __init__(self, env, dna_template, initial_pop_size, steps_per_day, rng, shuffle_rng, replica_number):
        self.env, self.dna_template, self.steps_per_day = env, dna_template, steps_per_day
        self.max_population = 10000
        self.rng, self.shuffle_rng = rng, shuffle_rng
        self.replica_number = replica_number # Add replica_number attribute
        self.active_population = [
            Protoribosome(env, 'cautious' if i % 2 == 0 else 'readthrough', dna_template, rng)
            for i in range(initial_pop_size)
        ]
        self.inactive_population = []


    def step(self, current_step):
        current_uv = self.env.get_current_uv()
        next_generation = []
        # print(f"Debug: Colony step {current_step} - Start. Active pop size: {len(self.active_population)}, Inactive pop size: {len(self.inactive_population)}") # Debug print
        for i, p in enumerate(self.active_population):
            # print(f"Debug: Processing protoribosome {i} of {len(self.active_population)}") # Debug print
            offspring = p.step(current_uv)
            if p.status != 'INACTIVE':
                next_generation.append(p)
            else:
                self.inactive_population.append(p)
                # print(f"Debug: Protoribosome {i} moved to inactive.") # Debug print
            if offspring:
                next_generation.append(offspring)
                # print(f"Debug: Offspring generated from protoribosome {i}.") # Debug print


        self.active_population = next_generation
        # print(f"Debug: Colony step {current_step} - After processing. Active pop size before cap: {len(self.active_population)}") # Debug print
        if len(self.active_population) > self.max_population:
            # print(f"Debug: Capping population from {len(self.active_population)} to {self.max_population}") # Debug print
            self.shuffle_rng.shuffle(self.active_population)
            self.active_population = self.active_population[:self.max_population]
        # print(f"Debug: Colony step {current_step} - End. Active pop size: {len(self.active_population)}, Inactive pop size: {len(self.inactive_population)}") # Debug print


    def get_aggregated_data(self):
        data = {
            'cautious': 0, 'readthrough': 0, 'inactive': 0,
            'cautious_avg_damage': 0.0, 'readthrough_avg_damage': 0.0,
            'cautious_nmp_total': 0, 'cautious_ndp_total': 0, 'cautious_ntp_total': 0,
            'readthrough_nmp_total': 0, 'readthrough_ndp_total': 0, 'readthrough_ntp_total': 0,
            'cautious_stop_count': 0, 'readthrough_stop_count': 0
        }
        if not self.active_population:
            data['inactive'] = len(self.inactive_population)
            return data

        cautious_pop = [p for p in self.active_population if p.strategy == 'cautious']
        readthrough_pop = [p for p in self.active_population if p.strategy == 'readthrough']

        data['cautious'] = len(cautious_pop)
        data['readthrough'] = len(readthrough_pop)

        if data['cautious'] > 0:
            data['cautious_avg_damage'] = sum(p.rna_damage_level for p in cautious_pop) / data['cautious']
            data['cautious_nmp_total'] = sum(sum(p.nmp_pool.values()) for p in cautious_pop)
            data['cautious_ndp_total'] = sum(sum(p.ndp_pool.values()) for p in cautious_pop)
            data['cautious_ntp_total'] = sum(sum(p.ntp_pool.values()) for p in cautious_pop)
            data['cautious_stop_count'] = sum(1 for p in cautious_pop if p.has_stop_codon())

        if data['readthrough'] > 0:
            data['readthrough_avg_damage'] = sum(p.rna_damage_level for p in readthrough_pop) / data['readthrough']
            data['readthrough_nmp_total'] = sum(sum(p.nmp_pool.values()) for p in readthrough_pop)
            data['readthrough_ndp_total'] = sum(sum(p.ndp_pool.values()) for p in readthrough_pop)
            data['readthrough_ntp_total'] = sum(sum(p.ntp_pool.values()) for p in readthrough_pop)
            data['readthrough_stop_count'] = sum(1 for p in readthrough_pop if p.has_stop_codon())

        data['inactive'] = len(self.inactive_population)
        return data

# Define save_checkpoint and load_latest_checkpoint here or ensure they are imported/defined in the same cell
def save_checkpoint(colony_object, replica_dir, step_count):
    """Saves the current state of the colony object to a pickle file."""
    checkpoint_dir = os.path.join(replica_dir, 'checkpoints')
    os.makedirs(checkpoint_dir, exist_ok=True) # Ensure the directory exists

    # Assuming replica_number is added to Colony
    checkpoint_filename = f"replica_{colony_object.replica_number}_step_{step_count}.pkl"
    checkpoint_path = os.path.join(checkpoint_dir, checkpoint_filename)

    try:
        with open(checkpoint_path, 'wb') as f:
            pickle.dump(colony_object, f)
        print(f"Checkpoint saved successfully at step {step_count}: {checkpoint_path}")
    except Exception as e:
        print(f"Error saving checkpoint at step {step_count}: {e}")

def load_latest_checkpoint(replica_dir):
    """Loads the latest checkpoint for a given replica directory."""
    checkpoint_dir = os.path.join(replica_dir, 'checkpoints')
    if not os.path.exists(checkpoint_dir):
        print(f"No checkpoints directory found for replica in {replica_dir}")
        return None, 0 # Return None and starting step 0

    # Find all checkpoint files in the directory
    checkpoint_files = glob.glob(os.path.join(checkpoint_dir, 'replica_*.pkl'))
    if not checkpoint_files:
        print(f"No checkpoint files found in {checkpoint_dir}")
        return None, 0 # Return None and starting step 0

    # Sort files by step number to find the latest
    # Assumes filename format is 'replica_{replica_number}_step_{step_count}.pkl'
    def get_step_from_filename(filepath):
        try:
            filename = os.path.basename(filepath)
            parts = filename.replace('.pkl', '').split('_')
            # Find the 'step' part and get the number after it
            step_index = parts.index('step') + 1
            return int(parts[step_index])
        except (ValueError, IndexError, TypeError) as e:
            print(f"Warning: Could not parse step from filename {filename}. Error: {e}")
            return -1 # Return a value that will be sorted first (assuming steps are non-negative)


    latest_checkpoint_file = max(checkpoint_files, key=get_step_from_filename)
    latest_step = get_step_from_filename(latest_checkpoint_file)

    print(f"Loading latest checkpoint: {latest_checkpoint_file}")
    try:
        with open(latest_checkpoint_file, 'rb') as f:
            colony_object = pickle.load(f)
        print("Checkpoint loaded successfully.")
        # The simulation should resume from the step *after* the checkpoint was saved
        resume_step = latest_step + 1
        return colony_object, resume_step
    except Exception as e:
        print(f"Error loading checkpoint {latest_checkpoint_file}: {e}")
        return None, 0 # Return None and starting step 0 if loading fails


def run_single_simulation(steps_per_day, sim_rng, shuffle_rng, dna_template, save_dir, replica_number):
    # Attempt to load the latest checkpoint
    colony, resume_step = load_latest_checkpoint(save_dir)

    if colony is None:
        print("No checkpoint found. Starting new simulation.")
        colony = Colony(Environment(sim_rng, steps_per_day), dna_template, 5000, steps_per_day, sim_rng, shuffle_rng, replica_number)
        start_step = 0
    else:
        print(f"Checkpoint loaded. Resuming simulation from step {resume_step}.")
        # Update environment's time if needed based on the loaded state, though Environment doesn't currently track time in a way that needs explicit setting here.
        # The Environment object is part of the pickled Colony, so its state is restored.
        start_step = resume_step

    data_log = []
    max_run_steps = 3 * colony.env.steps_per_year
    checkpoint_interval = 1000 # Define checkpoint interval

    # print("Debug: Entering simulation loop.") # Debug print before the loop
    # Adjust the progress bar total and initial value if resuming
    progress_bar = tqdm(desc=f"Simulating Replica {replica_number}", total=max_run_steps, initial=start_step, leave=False)

    try:
        # Start the loop from the resume_step
        for step_count in range(start_step, max_run_steps):
            # print(f"Debug: Start of simulation step {step_count}") # Debug print inside the loop
            colony.step(step_count)

            if step_count % 10 == 0:
                agg_data = colony.get_aggregated_data()
                agg_data['time'] = step_count
                agg_data['uv'] = colony.env.get_current_uv()
                agg_data['temp'] = colony.env.get_current_temperature()
                data_log.append(agg_data)

            # Save checkpoint periodically
            if step_count > 0 and step_count % checkpoint_interval == 0:
                 save_checkpoint(colony, save_dir, step_count)


            if step_count % 10 == 0: # Changed from 50 to 10
                c = agg_data.get('cautious', 0)
                r = agg_data.get('readthrough', 0)
                progress_bar.set_postfix_str(f"Cautious:{c}, Reckless:{r}")

            progress_bar.update(1)
            if len(colony.active_population) == 0:
                print("--- Population extinct. ---")
                break
    finally:
        progress_bar.close()
        # Save data even if interrupted - this will now save the cumulative data log
        # For resuming, you might want to load the previous data log and append to it
        # For simplicity here, we'll just save the data from the current run segment.
        # A more robust implementation would load and append.
        log_df = pd.DataFrame(data_log)
        output_path = os.path.join(save_dir, 'simulation_log.pkl')
        # If resuming, load existing log and append
        if start_step > 0 and os.path.exists(output_path):
            try:
                with open(output_path, 'rb') as f:
                    existing_data = pickle.load(f)
                existing_log_df = pd.DataFrame(existing_data['log'])
                log_df = pd.concat([existing_log_df, log_df], ignore_index=True)
            except Exception as e:
                print(f"Error loading existing simulation log: {e}. Saving only current segment.")


        with open(output_path, 'wb') as f:
            pickle.dump({'log': log_df.to_dict('records')}, f)


    final_agg = colony.get_aggregated_data()
    final_c, final_r = final_agg.get('cautious', 0), final_agg.get('readthrough', 0)
    winner = "Cautious" if final_r == 0 and final_c > 0 else "Reckless" if final_c == 0 and final_r > 0 else "Tie/Extinction"
    summary = {'Winner':winner,'Duration':step_count,'Final Cautious':final_c,'Final Reckless':final_r}

    return summary, log_df # Return the DataFrame


def main_orchestrator(num_replicas, steps_per_day):
    DNA_TEMPLATE = "AUGUGUUACUGG"
    all_summaries = []
    # Variable to store the last details_df for potential plotting
    last_details_df = None

    # Corrected base directory path
    base_save_dir = '/content/drive/My Drive/Colab Notebooks/Sim_Experiments'
    experiment_dir = os.path.join(base_save_dir, f"Experiment_{datetime.now().strftime('%Y-%m-%d_%H-%M')}")
    os.makedirs(experiment_dir, exist_ok=True)

    for i in range(1, num_replicas + 1):
        print(f"\n--- Processing Replica {i}/{num_replicas} ---")
        # For resuming, we want to use the same seed as the interrupted run.
        # This would require saving and loading the rng states as well.
        # For simplicity in this implementation, we'll generate a new seed if no checkpoint is found,
        # but if a checkpoint is loaded, the rng state is restored with the colony object.
        q_rng = QuantumRandomGenerator(29)
        quantum_seed = q_rng.get_seed()
        sim_rng = np.random.default_rng(seed=quantum_seed)
        shuffle_rng = random.Random(quantum_seed)

        replica_dir = os.path.join(experiment_dir, f"Replica_{i}")
        os.makedirs(replica_dir, exist_ok=True)

        # Pass replica_number to run_single_simulation
        summary, details_df = run_single_simulation(steps_per_day, sim_rng, shuffle_rng, DNA_TEMPLATE, replica_dir, i)
        summary['Replica'] = i
        all_summaries.append(summary)
        last_details_df = details_df # Store the DataFrame from the last run

        print(f"Data for Replica {i} saved in: {replica_dir}")

    print("\n\n" + "="*80 + "\n" + " EXPERIMENT SUMMARY ".center(80, "="))
    if all_summaries:
        summary_df = pd.DataFrame(all_summaries).set_index('Replica')
        print(summary_df.to_string())
    print("="*80)

    # Make the last details_df available in the global scope for plotting
    global last_simulation_df
    last_simulation_df = last_details_df

if __name__ == "__main__":
    main_orchestrator(num_replicas=2, steps_per_day=10)

Mounted at /content/drive

--- Processing Replica 1/2 ---
No checkpoints directory found for replica in /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-14/Replica_1
No checkpoint found. Starting new simulation.


Simulating Replica 1:   0%|          | 0/10950 [00:00<?, ?it/s]

Checkpoint saved successfully at step 1000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-14/Replica_1/checkpoints/replica_1_step_1000.pkl
Checkpoint saved successfully at step 2000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-14/Replica_1/checkpoints/replica_1_step_2000.pkl


KeyboardInterrupt: 

# Task
Plot the population counts of 'cautious' and 'readthrough' protoribosomes over time from the simulation log data stored in the pickle file "/content/drive/My Drive/protoribosome_simulation_log.pkl". Also, plot the UV intensity and temperature over time from the same log file. Summarize the findings from the plots.

## Load simulation data

### Subtask:
Load the simulation log data from the pickle file saved in Google Drive.


**Reasoning**:
I need to load the simulation data from a pickle file. Since the exact path contains timestamps and replica numbers, I will need to list the directories to find the most recent experiment and replica to load.



In [None]:
import pickle
import os
import glob
import pandas as pd

# Define the base directory
base_dir = '/content/drive/My Drive/Colab Notebooks/Sim_Experiments/'

# Find the most recent experiment directory
experiment_dirs = sorted(glob.glob(os.path.join(base_dir, 'Experiment_*')))
if not experiment_dirs:
    print("No experiment directories found.")
else:
    latest_experiment_dir = experiment_dirs[-1]
    print(f"Using latest experiment directory: {latest_experiment_dir}")

    # Find the latest replica directory within the latest experiment
    replica_dirs = sorted(glob.glob(os.path.join(latest_experiment_dir, 'Replica_*')))
    if not replica_dirs:
        print("No replica directories found in the latest experiment.")
    else:
        latest_replica_dir = replica_dirs[-1]
        print(f"Using latest replica directory: {latest_replica_dir}")

        # Define the full path to the simulation log file
        log_file_path = os.path.join(latest_replica_dir, 'simulation_log.pkl')

        # Load the data from the pickle file
        try:
            with open(log_file_path, 'rb') as f:
                data = pickle.load(f)
            simulation_log = data['log']
            print("Simulation log loaded successfully.")

            # Convert the list of dictionaries to a pandas DataFrame
            df_log = pd.DataFrame(simulation_log)
            print("Simulation log converted to DataFrame.")
            display(df_log.head())

        except FileNotFoundError:
            print(f"Error: Simulation log file not found at {log_file_path}")
        except KeyError:
            print("Error: 'log' key not found in the pickle file.")
        except Exception as e:
            print(f"An error occurred while loading the data: {e}")

**Reasoning**:
The previous attempt to load the data failed because the file was not found at the expected path. Looking at the previous code execution, it seems the simulation was interrupted before the file was successfully saved. I need to rerun the simulation to generate the log file.



In [None]:
# Rerun the simulation to generate the log file
main_orchestrator(num_replicas=1, steps_per_day=10)


--- Processing Replica 1/1 ---


Simulating Replica:   0%|          | 0/10950 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Task
Implement a checkpointing mechanism in the simulation code that saves the full state of the `Colony` object periodically to a file in Google Drive and allows resuming a simulation from the latest checkpoint.

## Define checkpointing mechanism

### Subtask:
Define how and where to save the full state of the `Colony` object.


**Reasoning**:
Outline the process for saving and loading the checkpoint based on the subtask instructions.



In [None]:
# 1. Naming convention for checkpoint files:
# Checkpoint files will be named using the format:
# replica_{replica_number}_step_{step_count}.pkl
# This ensures unique names per replica and step.

# 2. Directory structure:
# Checkpoints will be stored within the existing replica directory structure:
# /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_YYYY-MM-DD_HH-MM/Replica_i/checkpoints/
# A 'checkpoints' subdirectory will be created within each replica directory.

# 3. Suitability of pickle:
# The `pickle` module is generally suitable for serializing complex Python objects like the Colony class,
# which contains other custom class instances (Protoribosome, Environment) and standard data structures (lists, dictionaries, NumPy arrays).
# It's important to ensure that all objects within the Colony are picklable.
# Given the current structure, pickle should work.

# 4. Process for saving a checkpoint:
def save_checkpoint(colony_object, replica_dir, step_count):
    """Saves the current state of the colony object to a pickle file."""
    checkpoint_dir = os.path.join(replica_dir, 'checkpoints')
    os.makedirs(checkpoint_dir, exist_ok=True) # Ensure the directory exists

    checkpoint_filename = f"replica_{colony_object.replica_number}_step_{step_count}.pkl" # Assuming replica_number is added to Colony
    checkpoint_path = os.path.join(checkpoint_dir, checkpoint_filename)

    try:
        with open(checkpoint_path, 'wb') as f:
            pickle.dump(colony_object, f)
        print(f"Checkpoint saved successfully at step {step_count}: {checkpoint_path}")
    except Exception as e:
        print(f"Error saving checkpoint at step {step_count}: {e}")


# 5. Process for loading a checkpoint:
def load_latest_checkpoint(replica_dir):
    """Loads the latest checkpoint for a given replica directory."""
    checkpoint_dir = os.path.join(replica_dir, 'checkpoints')
    if not os.path.exists(checkpoint_dir):
        print(f"No checkpoints directory found for replica in {replica_dir}")
        return None, 0 # Return None and starting step 0

    # Find all checkpoint files in the directory
    checkpoint_files = glob.glob(os.path.join(checkpoint_dir, 'replica_*.pkl'))
    if not checkpoint_files:
        print(f"No checkpoint files found in {checkpoint_dir}")
        return None, 0 # Return None and starting step 0

    # Sort files by step number to find the latest
    # Assumes filename format is 'replica_{replica_number}_step_{step_count}.pkl'
    def get_step_from_filename(filepath):
        try:
            filename = os.path.basename(filepath)
            parts = filename.replace('.pkl', '').split('_')
            # Find the 'step' part and get the number after it
            step_index = parts.index('step') + 1
            return int(parts[step_index])
        except (ValueError, IndexError, TypeError) as e:
            print(f"Warning: Could not parse step from filename {filename}. Error: {e}")
            return -1 # Return a value that will be sorted first (assuming steps are non-negative)

    latest_checkpoint_file = max(checkpoint_files, key=get_step_from_filename)
    latest_step = get_step_from_filename(latest_checkpoint_file)

    print(f"Loading latest checkpoint: {latest_checkpoint_file}")
    try:
        with open(latest_checkpoint_file, 'rb') as f:
            colony_object = pickle.load(f)
        print("Checkpoint loaded successfully.")
        # The simulation should resume from the step *after* the checkpoint was saved
        resume_step = latest_step + 1
        return colony_object, resume_step
    except Exception as e:
        print(f"Error loading checkpoint {latest_checkpoint_file}: {e}")
        return None, 0 # Return None and starting step 0 if loading fails


## Implement periodic saving

### Subtask:
Add code within the `run_single_simulation` loop to save the `Colony` object's state to a checkpoint file at regular intervals.


**Reasoning**:
Add checkpointing logic to the `run_single_simulation` function to save the colony state periodically and update the Colony class to store the replica number.



In [None]:
class Colony:
    def __init__(self, env, dna_template, initial_pop_size, steps_per_day, rng, shuffle_rng, replica_number):
        self.env, self.dna_template, self.steps_per_day = env, dna_template, steps_per_day
        self.max_population = 10000
        self.rng, self.shuffle_rng = rng, shuffle_rng
        self.replica_number = replica_number # Add replica_number attribute
        self.active_population = [
            Protoribosome(env, 'cautious' if i % 2 == 0 else 'readthrough', dna_template, rng)
            for i in range(initial_pop_size)
        ]
        self.inactive_population = []


    def step(self, current_step):
        current_uv = self.env.get_current_uv()
        next_generation = []
        # print(f"Debug: Colony step {current_step} - Start. Active pop size: {len(self.active_population)}, Inactive pop size: {len(self.inactive_population)}") # Debug print
        for i, p in enumerate(self.active_population):
            # print(f"Debug: Processing protoribosome {i} of {len(self.active_population)}") # Debug print
            offspring = p.step(current_uv)
            if p.status != 'INACTIVE':
                next_generation.append(p)
            else:
                self.inactive_population.append(p)
                # print(f"Debug: Protoribosome {i} moved to inactive.") # Debug print
            if offspring:
                next_generation.append(offspring)
                # print(f"Debug: Offspring generated from protoribosome {i}.") # Debug print


        self.active_population = next_generation
        # print(f"Debug: Colony step {current_step} - After processing. Active pop size before cap: {len(self.active_population)}") # Debug print
        if len(self.active_population) > self.max_population:
            # print(f"Debug: Capping population from {len(self.active_population)} to {self.max_population}") # Debug print
            self.shuffle_rng.shuffle(self.active_population)
            self.active_population = self.active_population[:self.max_population]
        # print(f"Debug: Colony step {current_step} - End. Active pop size: {len(self.active_population)}, Inactive pop size: {len(self.inactive_population)}") # Debug print


    def get_aggregated_data(self):
        data = {
            'cautious': 0, 'readthrough': 0, 'inactive': 0,
            'cautious_avg_damage': 0.0, 'readthrough_avg_damage': 0.0,
            'cautious_nmp_total': 0, 'cautious_ndp_total': 0, 'cautious_ntp_total': 0,
            'readthrough_nmp_total': 0, 'readthrough_ndp_total': 0, 'readthrough_ntp_total': 0,
            'cautious_stop_count': 0, 'readthrough_stop_count': 0
        }
        if not self.active_population:
            data['inactive'] = len(self.inactive_population)
            return data

        cautious_pop = [p for p in self.active_population if p.strategy == 'cautious']
        readthrough_pop = [p for p in self.active_population if p.strategy == 'readthrough']

        data['cautious'] = len(cautious_pop)
        data['readthrough'] = len(readthrough_pop)

        if data['cautious'] > 0:
            data['cautious_avg_damage'] = sum(p.rna_damage_level for p in cautious_pop) / data['cautious']
            data['cautious_nmp_total'] = sum(sum(p.nmp_pool.values()) for p in cautious_pop)
            data['cautious_ndp_total'] = sum(sum(p.ndp_pool.values()) for p in cautious_pop)
            data['cautious_ntp_total'] = sum(sum(p.ntp_pool.values()) for p in cautious_pop)
            data['cautious_stop_count'] = sum(1 for p in cautious_pop if p.has_stop_codon())

        if data['readthrough'] > 0:
            data['readthrough_avg_damage'] = sum(p.rna_damage_level for p in readthrough_pop) / data['readthrough']
            data['readthrough_nmp_total'] = sum(sum(p.nmp_pool.values()) for p in readthrough_pop)
            data['readthrough_ndp_total'] = sum(sum(p.ndp_pool.values()) for p in readthrough_pop)
            data['readthrough_ntp_total'] = sum(sum(p.ntp_pool.values()) for p in readthrough_pop)
            data['readthrough_stop_count'] = sum(1 for p in readthrough_pop if p.has_stop_codon())

        data['inactive'] = len(self.inactive_population)
        return data

def run_single_simulation(steps_per_day, rng, shuffle_rng, dna_template, save_dir, replica_number):
    colony = Colony(Environment(rng, steps_per_day), dna_template, 5000, steps_per_day, rng, shuffle_rng, replica_number) # Pass replica_number to Colony
    data_log = []
    max_run_steps = 3 * colony.env.steps_per_year
    checkpoint_interval = 1000 # Define checkpoint interval

    # print("Debug: Entering simulation loop.") # Debug print before the loop
    progress_bar = tqdm(desc="Simulating Replica", total=max_run_steps, leave=False)
    try:
        for step_count in range(max_run_steps):
            # print(f"Debug: Start of simulation step {step_count}") # Debug print inside the loop
            colony.step(step_count)

            if step_count % 10 == 0:
                agg_data = colony.get_aggregated_data()
                agg_data['time'] = step_count
                agg_data['uv'] = colony.env.get_current_uv()
                agg_data['temp'] = colony.env.get_current_temperature()
                data_log.append(agg_data)

            if step_count % checkpoint_interval == 0 and step_count > 0: # Save checkpoint periodically
                 save_checkpoint(colony, save_dir, step_count)


            if step_count % 10 == 0: # Changed from 50 to 10
                c = agg_data.get('cautious', 0)
                r = agg_data.get('readthrough', 0)
                progress_bar.set_postfix_str(f"Cautious:{c}, Reckless:{r}")

            progress_bar.update(1)
            if len(colony.active_population) == 0:
                print("--- Population extinct. ---")
                break
    finally:
        progress_bar.close()
        log_df = pd.DataFrame(data_log)
        # Save data even if interrupted
        output_path = os.path.join(save_dir, 'simulation_log.pkl')
        with open(output_path, 'wb') as f:
            pickle.dump({'log': log_df.to_dict('records')}, f)

    final_agg = colony.get_aggregated_data()
    final_c, final_r = final_agg.get('cautious', 0), final_agg.get('readthrough', 0)
    winner = "Cautious" if final_r == 0 and final_c > 0 else "Reckless" if final_c == 0 and final_r > 0 else "Tie/Extinction"
    summary = {'Winner':winner,'Duration':step_count,'Final Cautious':final_c,'Final Reckless':final_r}

    return summary, log_df # Return the DataFrame


def main_orchestrator(num_replicas, steps_per_day):
    DNA_TEMPLATE = "AUGUGUUACUGG"
    all_summaries = []
    # Variable to store the last details_df for potential plotting
    last_details_df = None

    base_save_dir = '/content/drive/drive/Colab Notebooks/Sim_Experiments'
    experiment_dir = os.path.join(base_save_dir, f"Experiment_{datetime.now().strftime('%Y-%m-%d_%H-%M')}")
    os.makedirs(experiment_dir, exist_ok=True)

    for i in range(1, num_replicas + 1):
        print(f"\n--- Processing Replica {i}/{num_replicas} ---")
        q_rng = QuantumRandomGenerator(29)
        quantum_seed = q_rng.get_seed()
        sim_rng = np.random.default_rng(seed=quantum_seed)
        shuffle_rng = random.Random(quantum_seed)

        replica_dir = os.path.join(experiment_dir, f"Replica_{i}")
        os.makedirs(replica_dir, exist_ok=True)

        summary, details_df = run_single_simulation(steps_per_day, sim_rng, shuffle_rng, DNA_TEMPLATE, replica_dir, i) # Pass replica_number
        summary['Replica'] = i
        all_summaries.append(summary)
        last_details_df = details_df # Store the DataFrame from the last run

        print(f"Data for Replica {i} saved in: {replica_dir}")

    print("\n\n" + "="*80 + "\n" + " EXPERIMENT SUMMARY ".center(80, "="))
    if all_summaries:
        summary_df = pd.DataFrame(all_summaries).set_index('Replica')
        print(summary_df.to_string())
    print("="*80)

    # Make the last details_df available in the global scope for plotting
    global last_simulation_df
    last_simulation_df = last_details_df

if __name__ == "__main__":
    # Define save_checkpoint and load_latest_checkpoint here or ensure they are imported/defined in the same cell
    # (Assuming they are defined in a previous cell as per the planning subtask)
    main_orchestrator(num_replicas=2, steps_per_day=10)

OSError: [Errno 95] Operation not supported: '/content/drive/drive'

**Reasoning**:
The error indicates an issue with the path `/content/drive/drive`. This is likely a typo in the base directory path. It should be `/content/drive/My Drive/`. Correct the path and re-run the `main_orchestrator` function to initiate the simulation with checkpointing enabled.



In [None]:
# Correct the base directory path and re-run the orchestrator

def main_orchestrator(num_replicas, steps_per_day):
    DNA_TEMPLATE = "AUGUGUUACUGG"
    all_summaries = []
    # Variable to store the last details_df for potential plotting
    last_details_df = None

    # Corrected base directory path
    base_save_dir = '/content/drive/My Drive/Colab Notebooks/Sim_Experiments'
    experiment_dir = os.path.join(base_save_dir, f"Experiment_{datetime.now().strftime('%Y-%m-%d_%H-%M')}")
    os.makedirs(experiment_dir, exist_ok=True)

    for i in range(1, num_replicas + 1):
        print(f"\n--- Processing Replica {i}/{num_replicas} ---")
        q_rng = QuantumRandomGenerator(29)
        quantum_seed = q_rng.get_seed()
        sim_rng = np.random.default_rng(seed=quantum_seed)
        shuffle_rng = random.Random(quantum_seed)

        replica_dir = os.path.join(experiment_dir, f"Replica_{i}")
        os.makedirs(replica_dir, exist_ok=True)

        summary, details_df = run_single_simulation(steps_per_day, sim_rng, shuffle_rng, DNA_TEMPLATE, replica_dir, i) # Pass replica_number
        summary['Replica'] = i
        all_summaries.append(summary)
        last_details_df = details_df # Store the DataFrame from the last run

        print(f"Data for Replica {i} saved in: {replica_dir}")

    print("\n\n" + "="*80 + "\n" + " EXPERIMENT SUMMARY ".center(80, "="))
    if all_summaries:
        summary_df = pd.DataFrame(all_summaries).set_index('Replica')
        print(summary_df.to_string())
    print("="*80)

    # Make the last details_df available in the global scope for plotting
    global last_simulation_df
    last_simulation_df = last_details_df

# Re-run the orchestrator with the corrected path
main_orchestrator(num_replicas=2, steps_per_day=10)

NameError: name 'os' is not defined

In [None]:
# Re-run the orchestrator with the corrected path and imports
main_orchestrator(num_replicas=2, steps_per_day=10)


--- Processing Replica 1/2 ---
No checkpoints directory found for replica in /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_1
No checkpoint found. Starting new simulation.


Simulating Replica 1:   0%|          | 0/10950 [00:00<?, ?it/s]

Checkpoint saved successfully at step 1000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_1/checkpoints/replica_1_step_1000.pkl
Checkpoint saved successfully at step 2000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_1/checkpoints/replica_1_step_2000.pkl
Checkpoint saved successfully at step 3000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_1/checkpoints/replica_1_step_3000.pkl
Checkpoint saved successfully at step 4000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_1/checkpoints/replica_1_step_4000.pkl
Checkpoint saved successfully at step 5000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_1/checkpoints/replica_1_step_5000.pkl
Checkpoint saved successfully at step 6000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Re

Simulating Replica 2:   0%|          | 0/10950 [00:00<?, ?it/s]

Checkpoint saved successfully at step 1000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_2/checkpoints/replica_2_step_1000.pkl
Checkpoint saved successfully at step 2000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_2/checkpoints/replica_2_step_2000.pkl
Checkpoint saved successfully at step 3000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_2/checkpoints/replica_2_step_3000.pkl
Checkpoint saved successfully at step 4000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_2/checkpoints/replica_2_step_4000.pkl
Checkpoint saved successfully at step 5000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Replica_2/checkpoints/replica_2_step_5000.pkl
Checkpoint saved successfully at step 6000: /content/drive/My Drive/Colab Notebooks/Sim_Experiments/Experiment_2025-07-05_13-18/Re