In [14]:
import modal
from pathlib import Path
from torch.utils.data import DataLoader, Dataset
from transformers import DataCollatorWithPadding, pipeline
import os
import torch
import json
import base64
import numpy as np
import csv
import random
import csv
from deap import base, creator, tools, algorithms
import modal
import time
import logging
from concurrent.futures import ProcessPoolExecutor

ESM3_MODEL = "esm3-medium-2024-08"  # Define ESM3 Model
ESM3_TOKEN = ""  # Define Forge Token

## Scoring Functions

In [2]:


# Build image and dependencies. Will be cached after build
NanoBERT = (
    modal.Image.debian_slim(python_version="3.12").pip_install("transformers", "torch", "sentence-transformers", "datasets", "accelerate", "sentence-transformers",  "pandas", "google-cloud-bigquery==3.26.0")
    .apt_install("libopenblas-dev")
    .run_commands("mkdir -p /app/NB2_weights")
    .workdir("/app"))

    
app = modal.App(name="ga")






def log_sum_exp(logits):
    import base64
    import numpy as np
    """Applies the log-sum-exp trick for numerical stability"""
    # Accepts a 1-dim logits vector
    a_max = np.max(logits)
    return a_max + np.log(np.sum(np.exp(logits - a_max)))
def mean_pooling(embeddings, attention_mask):
    # Apply the attention mask to exclude padded tokens
    mask_expanded = attention_mask.unsqueeze(-1).expand(embeddings.size())
    sum_embeddings = torch.sum(embeddings * mask_expanded, 1)
    sum_mask = torch.clamp(mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask
def calculate_sequence_max_log_probability_2(logits, input_ids, tokenizer):
    import base64
    import numpy as np
    """
    Computes the sequence log-probability using the provided logits and input tokens.

    logits: Tensor of shape (sequence_length, vocab_size)
    input_ids: Tensor of token ids for the input sequence (sequence_length)
    tokenizer: Tokenizer for decoding tokens
    """
    total_log_probability = 0
    # Token IDs to exclude (special tokens)
    excluded_token_ids = {0, 1, 2, 3, 4}  # Exclude [PAD], [s], [/s], <unk>, <mask>
    for index, token_id in enumerate(input_ids):
        if token_id.item() in excluded_token_ids:
            # Skip special tokens
            continue
        token_logits = logits[index].cpu().numpy()  # Logits for the current token position
        if token_id == tokenizer.mask_token_id:
            # For masked positions, select the logit associated with the maximum prediction
            selected_logit = np.max(token_logits)
        elif token_id not in [0,2]:
            # For unmasked positions, select the logit associated with the original token
            selected_logit = token_logits[token_id.item()]

        # Normalize the selected logit against the logits distribution to compute the log probability
        log_probability = selected_logit - log_sum_exp(token_logits)
        total_log_probability += log_probability

    return total_log_probability





class TextDataset(Dataset):
    def __init__(self, texts):
        self.texts = texts

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        # Return text, methods, and metadata for each entry
        return self.texts[idx]



@app.cls(
    image=NanoBERT,
    timeout=600,
    secrets=[
        modal.Secret.from_name("gcp-biolm-hackathon-bq-secret")
    ],  # Include the GCP BQ secret
)
class NBModel:
    @modal.build()  # add another step to the image build
    def download_model_to_folder(self):
        from huggingface_hub import snapshot_download

        os.makedirs("/app/nanoBERT", exist_ok=True)
        snapshot_download("tadsatlawa/nanoBERT", local_dir="/app/nanoBERT")

    @modal.enter()
    def setup(self):
        from transformers import pipeline, RobertaTokenizer, AutoModel,AutoModelForMaskedLM
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.tokenizer = RobertaTokenizer.from_pretrained("/app/nanoBERT", return_tensors="pt")
        self.vocab = self.tokenizer.get_vocab()
        self.model = AutoModelForMaskedLM.from_pretrained("/app/nanoBERT").to(self.device)
        # Try loading BQ client


    @modal.method()
    def inference_logits(self, seq, batch_size = 256, write_to_bq=False):


        seqs = [seq]
        results = []
        dataset = TextDataset(seqs)


        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
        
        for batch in dataloader:
            raw_texts= batch  # Unpack the text, methods, and metadata
        
            tokenized_batch = self.tokenizer(
                raw_texts,
                padding="longest",  # Pad the sequences to the longest in the batch
                truncation=True,  # Truncate sequences longer than the model's limit
                return_tensors="pt"  # Return as PyTorch tensors
            ).to(self.device)
        
            input_ids = tokenized_batch['input_ids']  # Tokenized and padded inputs
            attention_mask = tokenized_batch['attention_mask'] 


        # Perform a forward pass through the model
            with torch.no_grad():
                    outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
                    logits = outputs.logits  # (batch_size, sequence_length, vocab_size)
                    hidden_states = outputs.hidden_states[-1]  # Get the last layer of hidden states (embeddings)

            # Perform mean pooling on the hidden states to get sentence embeddings
            embeddings = mean_pooling(hidden_states, attention_mask)  # Get sentence embeddings via mean pooling
    
            #  Optionally move logits and embeddings back to CPU if needed for further processing
            logits = logits.cpu()
            embeddings = embeddings.cpu()
 
            # For each input in the batch, calculate the log probability for the sequence
            for batch_idx in range(input_ids.size(0)):  # Iterate over each sample in the batch
                input_sequence = input_ids[batch_idx]
                logits_for_sequence = logits[batch_idx]  # Get logits for the current sequence
                log_prob = calculate_sequence_max_log_probability_2(logits_for_sequence, input_sequence, self.tokenizer)
                seq_embedding = base64.b64encode(
                np.array(embeddings[batch_idx], dtype=np.float32).tobytes()
            ).decode("utf-8")

                result = {
                "sequence": raw_texts[batch_idx],

                "sequence_log_probability": log_prob,

            }
                results.append(result)

       
        return log_prob



In [3]:

def log_sum_exp(logits):
    import base64
    import numpy as np
    """Applies the log-sum-exp trick for numerical stability"""
    # Accepts a 1-dim logits vector
    a_max = np.max(logits)
    return a_max + np.log(np.sum(np.exp(logits - a_max)))

def calculate_sequence_max_log_probability(logits, input_ids):
    import base64
    import numpy as np
    """
    Computes the sequence log-probability using the provided logits and input tokens.

    logits: Tensor of shape (sequence_length, vocab_size)
    input_ids: Tensor of token ids for the input sequence (sequence_length)
    tokenizer: Tokenizer for decoding tokens
    """
    total_log_probability = 0
    # Token IDs to exclude (special tokens)
    allowed_token_ids = {5, 10, 17, 13, 23, 16,  6,  9, 21, 12,  4, 15, 20, 18, 14,  8, 11, 22, 19,  7}  # Only count AA tokens
    for index, token_id in enumerate(input_ids):
        if token_id.item() not in allowed_token_ids:
            # Skip special tokens
            continue
        token_logits = logits[index].cpu().numpy()
        selected_logit = token_logits[token_id.item()]

        # Normalize the selected logit against the logits distribution to compute the log probability
        log_probability = selected_logit - log_sum_exp(token_logits)
        total_log_probability += log_probability

    return total_log_probability





from esm.models.esm3 import ESM3
from esm.sdk.api import (
    ESM3InferenceClient,
    ESMProtein,
    ESMProteinError,
    ESMProteinTensor,
    GenerationConfig,
    LogitsConfig,
    LogitsOutput,
    SamplingConfig,
    SamplingTrackConfig,
)
from esm.sdk import client

# Try loading BQ client

        
            
        

def inference_logits(input):


    seq = input
    model = client(ESM3_MODEL, token=ESM3_TOKEN)
    import esm
    import time
    from esm.models.esm3 import ESM3
    from esm.sdk.api import (
        ESM3InferenceClient,
        ESMProtein,
        ESMProteinError,
        ESMProteinTensor,
        GenerationConfig,
        LogitsConfig,
        LogitsOutput,
        SamplingConfig,
        SamplingTrackConfig,
    )
    retries=0
    lock =True
    while retries < 5 and lock:
        try:

            protein = ESMProtein(sequence=seq)
            protein_tensor = model.encode(protein)
            logits_output = model.logits(protein_tensor, LogitsConfig(sequence=True))
            assert isinstance(
                logits_output, LogitsOutput
            ), f"LogitsOutput was expected but got {logits_output}"
            assert (
                logits_output.logits is not None and logits_output.logits.sequence is not None
            )
            lock=False  # Function succeeded
        except Exception as e:
            retries += 1
            print(f"Attempt {retries} failed: {e}. Retrying...")
            time.sleep(60)  # Wait before retrying
    if retries ==5:
        return None

    log_prob = calculate_sequence_max_log_probability(logits_output.logits.sequence, protein_tensor.sequence)
    

    result = {
    "sequence": seq,
    f"{ESM3_MODEL}": log_prob,
}
    print(result)
    return log_prob

In [13]:
# Define ESM3 parallel requests

def fetch_all_data(seqs):
    # Create a list of tasks for parallel execution
    with ProcessPoolExecutor() as executor:
        results =  executor.map(inference_logits, seqs)
    return list(results)


def esm3_main(seqs):
    # Call fetch_all_data to get the results in parallel
    results = fetch_all_data(seqs)
    return results

def score_esm3(seqs):
    n = 48 # number of requests per minute -2 
    request_batches = [seqs[i:i + n] for i in range(0, len(seqs), n)]
    full_results = []
    for batch in request_batches:
        
        esm3_results = esm3_main(batch)
        full_results.extend(esm3_results)
        time.sleep(60)  # per minute requests can probably lower depending on completion time
    return full_results 

In [5]:

async def score_nb(seqs):
    with modal.enable_output():
        with app.run():
            # Instantiate the Model class
            nb_model = NBModel()
            # Call the screen_sequences method
            result = [x async for x in nb_model.inference_logits.map.aio(seqs)]
            return result

In [6]:
initial_sequence = "QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS"

In [7]:
nb_score_intitial = await score_nb([initial_sequence])
nb_score_intitial

Output()

Output()

Output()

[-44.091416358947754]

In [8]:
esm3_score_initial = score_esm3([initial_sequence])
esm3_score_initial 

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -52.13194680213928}


[-52.13194680213928]

## GA

In [10]:


# Load the fill-mask model from Hugging Face
fill_mask = pipeline("fill-mask", model="tadsatlawa/nanoBERT", device="cuda",top_k=20)


# Define fitness based on two objectives
creator.create("FitnessMax", base.Fitness, weights=(1.0, 1.0))  # Maximize both scores
creator.create("Individual", list, fitness=creator.FitnessMax)

# Define DEAP toolbox
toolbox = base.Toolbox()

# Register crossover (two-point crossover from DEAP)
toolbox.register("mate", tools.cxTwoPoint)

# Register selection (use DEAP's selection method)
toolbox.register("select", tools.selTournament, tournsize=3)  # You can adjust the tournsize as needed


async def evaluate_population(population, generation, n):
    """Evaluate the population using both scoring functions, applying score_esm3 only every 3 generations."""
    sequences = [''.join(ind) for ind in population]
    print(sequences)  # For debugging, to see the sequences being evaluated
    
    # Parallel scoring using Modal Apps
    scores1 = await score_nb(sequences)  # Always run the first scoring function
    
    # Run the slow scoring function (score_esm3) only every n generations
    scores2 = score_esm3(sequences) if generation % n == 0 else [None] * len(sequences)
    
    return list(zip(scores1, scores2))

def mutate_sequence_with_fill_mask(sequence, mutation_count=1, returned_seqs=1, top_k_choice=3):
    """
    Mutate the given sequence using a fill-mask approach.
    Applies exactly `mutation_count` mutations at random positions.
    """
    individual = list(sequence)  # Convert string to a list of characters
    sequence_length = len(individual)
    high_mutation_regions = [(26, 36), (49, 58), (98, 114)]  # 1-indexed regions

    # return 1 or several mutated sequences
    seqs_to_return = []
    for seq_to_return in range(returned_seqs):  #TODO: ensure mutated seqs unique
        # Randomly select `mutation_count` mutation positions, biased towards high mutation regions
        mutated = False
        while not mutated:
            mutation_positions = []
            while len(mutation_positions) < mutation_count:
                if random.random() < 0.7:  # 70% chance to first mutate in high mutation regions to get novel CDRs
                    region = random.choice(high_mutation_regions)
                    mutation_position = random.randint(region[0] - 1, region[1] - 1)  # Convert to 0-indexed
                else:
                    mutation_position = random.randint(0, sequence_length - 1)
        
                # Ensure no duplicates
                if mutation_position not in mutation_positions:
                    mutation_positions.append(mutation_position)
        
            # Apply mutations at the selected positions
            for mutation_position in mutation_positions:
                # Mask the mutation position in the sequence (replace with a special token)
                masked_sequence = individual[:]
                masked_sequence[mutation_position] = fill_mask.tokenizer.mask_token
        
                # Convert the list back to a string for fill-mask pipeline
                masked_sequence_str = ''.join(masked_sequence)
        
                # Run the fill-mask pipeline on the masked sequence
                predictions = fill_mask(masked_sequence_str)
                #print(predictions)
        
                # Find the top-N predictions that are different from the wild-type residue
                wild_type_residue = individual[mutation_position]
                #print("WT")
                #print(wild_type_residue)
    
                wild_type_score = next((pred['score'] for pred in predictions if pred['token_str'] == wild_type_residue), float('-inf'))
                #print(wild_type_score)
                # Filter to get only mutations that are different and better than the wild type
                top_mutations = [
                    pred['token_str'] for pred in predictions 
                    if pred['token_str'] != wild_type_residue and pred['score'] > wild_type_score
                ][:top_k_choice]
                #print(top_mutations)
        
                # If there are valid mutation predictions, apply one of them
                if top_mutations:
                    selected_mutation = random.choice(top_mutations)
                    print(f"Mutation of {individual[mutation_position]} to {selected_mutation} at {mutation_position + 1}")
                    individual[mutation_position] = selected_mutation
                    mutated=True
                    
        seqs_to_return.append(''.join(individual))

    return seqs_to_return

async def genetic_algorithm_deap(initial_sequence, num_generations, num_runs, population_size, slow_score_every_n_generations=1, top_k=50, mutation_count=1, child_size=2, crossover_prob=0.3, mutation_prob=0.8):
    all_runs_results = []
    
    # Open a CSV file to continuously log the evaluated sequences and their scores
    with open("evaluated_sequences_ga.csv", "w", newline='') as eval_file:
        writer = csv.writer(eval_file)
        writer.writerow(["sequence", "score_nb", "score_esm3", "generation", "run"])  # Add headers

        for run in range(num_runs):
            print(f"Starting run {run + 1}/{num_runs}")
            
            # Create the initial population with mutated versions of the initial sequence
            population = [creator.Individual(list(mutate_sequence_with_fill_mask(initial_sequence, mutation_count)[0])) for _ in range(population_size)]

            for gen in range(num_generations):
                print(f"-- Generation {gen + 1}/{num_generations}")
                
                # Evaluate the population
                scores = await evaluate_population(population, gen, slow_score_every_n_generations)

                # Assign fitness to the individuals and save each evaluation to the CSV
                for seq, (score1, score2) in zip(population, scores):
                    seq.fitness.values = (score1, score2)
                    # Write the sequence and scores to the CSV
                    writer.writerow([''.join(seq), score1, score2, gen + 1, run + 1])

                
                selected_individuals = toolbox.select(population, population_size)  # keep population size fixed
                offspring = list(map(toolbox.clone, selected_individuals))


                new_population = []
                # Apply crossover on the offspring
                for child1, child2 in zip(offspring[::2], offspring[1::2]):
                    # Apply crossover with some probability
                    if random.random() < crossover_prob:
                        toolbox.mate(child1, child2)
                        print("Applying Crossover")
                    # Apply mutation with some probability
                    if random.random() < mutation_prob:
                        # Get child_size mutated sequences
                        new_population.extend([creator.Individual(list(i)) for i in mutate_sequence_with_fill_mask(''.join(child1),mutation_count=mutation_count, returned_seqs=child_size)])  # Add both to the new population
                        print("Applying Mutation")
                    else:
                        new_population.extend([child1, child2])  # Add unchanged offspring if not mutated


            # Gather results of the final population for this run
            final_scores = await evaluate_population(population, num_generations, slow_score_every_n_generations)
            final_results = [(seq, score1, score2) for seq, (score1, score2) in zip(population, final_scores)]
            all_runs_results.append(final_results)

    # Save the top sequences to a separate CSV after all runs
    best_sequences = sorted([ind for run_results in all_runs_results for ind in run_results],
                            key=lambda x: (x[1], x[2] if x[2] is not None else 0),
                            reverse=True)[:top_k]
    
    with open(f"best_sequences_ga.csv", "w", newline='') as best_file:
        writer = csv.writer(best_file)
        writer.writerow(["sequence", "score_nb", "score_esm3"])
        for seq_tuple in best_sequences:
            seq = ''.join(seq_tuple[0])  # Convert the Individual (list of characters) back to a string
            score1 = seq_tuple[1]
            score2 = seq_tuple[2]
            writer.writerow([seq, score1, score2])

    



In [11]:
res = await genetic_algorithm_deap(
        initial_sequence=initial_sequence,
        num_generations=3,
        population_size=50,
        num_runs=1,
        slow_score_every_n_generations=1,
        top_k=10,
        mutation_count=1,
        child_size=2,
        crossover_prob=0.3,
        mutation_prob=0.8
    )

Starting run 1/1


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Mutation of T to G at 107
Mutation of A to R at 99
Mutation of D to K at 56
Mutation of D to K at 56
Mutation of A to C at 103
Mutation of A to G at 100
Mutation of S to N at 31
Mutation of S to Y at 102
Mutation of G to L at 106
Mutation of D to K at 56
Mutation of S to Y at 102
Mutation of S to G at 49
Mutation of S to G at 49
Mutation of T to F at 107
Mutation of G to L at 106
Mutation of S to I at 29
Mutation of A to G at 100
Mutation of S to T at 52
Mutation of A to G at 100
Mutation of S to T at 52
Mutation of T to S at 21
Mutation of T to A at 107
Mutation of S to F at 29
Mutation of S to N at 31
Mutation of S to I at 29
Mutation of T to A at 107
Mutation of D to G at 56
Mutation of R to S at 30
Mutation of R to S at 30
Mutation of G to A at 33
Mutation of D to G at 56
Mutation of G to L at 106
Mutation of R to G at 54
Mutation of A to R at 99
Mutation of G to V at 33
Mutation of S to Y at 102
Mutation of S to N at 31
Mutation of S to N at 52
Mutation of S to Y at 102
Mutation o

Output()

Output()

Output()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVGGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.694496154785156}{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGYAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -53.20532822608948}

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSCWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.471933126449585}{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSSSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -49.864715337753296}

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTFRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -49.4352970123291}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGGSTGYADSV

TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling p

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTDYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -53.46187901496887}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYLTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -55.188185930252075}


TOKENIZERS_PARALLELISM=(true | false)
TOKENIZERS_PARALLELISM=(true | false)


Mutation of R to S at 54
Mutation of T to N at 52
Applying Mutation
Mutation of S to N at 31
Mutation of G to V at 33
Applying Mutation
Applying Crossover
Mutation of D to K at 56
Mutation of G to A at 33
Applying Mutation
Applying Crossover
Mutation of G to K at 56
Mutation of S to N at 31
Applying Mutation
Mutation of S to N at 31
Mutation of S to F at 29
Applying Mutation
Applying Crossover
Mutation of S to L at 29
Mutation of D to G at 56
Applying Mutation
Mutation of S to N at 52
Mutation of R to D at 54
Applying Mutation
Applying Crossover
Mutation of S to T at 52
Mutation of S to G at 49
Applying Mutation
Mutation of A to Y at 100
Mutation of S to N at 31
Applying Mutation
Mutation of S to Y at 102
Mutation of S to N at 52
Applying Mutation
Mutation of R to S at 30
Mutation of G to A at 33
Applying Mutation
Applying Crossover
Mutation of D to K at 56
Mutation of I to F at 29
Applying Mutation
Applying Crossover
Mutation of L to R at 108
Mutation of S to N at 52
Applying Mutation

Output()

Output()

Output()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAARAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.77056694030762}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVGGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.87539577484131}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYLTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -55.188185930252075}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGGSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.01465821266174}{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGGLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.303130865097046}

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSV

TOKENIZERS_PARALLELISM=(true | false)
TOKENIZERS_PARALLELISM=(true | false)
=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTDYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -53.46187901496887}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYLTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -55.188185930252075}


TOKENIZERS_PARALLELISM=(true | false)


Mutation of E to V at 5
Mutation of G to L at 106
Applying Mutation
Applying Crossover
Mutation of S to G at 49
Mutation of R to G at 30
Applying Mutation
Mutation of S to V at 11
Mutation of R to G at 54
Applying Mutation
Mutation of A to R at 99
Mutation of R to G at 54
Applying Mutation
Applying Crossover
Mutation of R to G at 54
Mutation of S to N at 52
Applying Mutation
Mutation of G to F at 100
Mutation of G to V at 33
Applying Mutation
Mutation of R to S at 30
Mutation of D to S at 56
Applying Mutation
Mutation of A to S at 103
Mutation of S to L at 11
Applying Mutation
Mutation of D to A at 56
Mutation of R to G at 54
Applying Mutation
Applying Crossover
Mutation of R to S at 30
Mutation of G to V at 33
Applying Mutation
Mutation of S to N at 31
Mutation of S to G at 49
Applying Mutation
Mutation of A to S at 103
Mutation of T to G at 107
Applying Mutation
Mutation of G to A at 33
Mutation of L to P at 108
Applying Mutation
Applying Crossover
Applying Crossover
Mutation of G to

Output()

Output()

Output()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSSSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -49.864715337753296}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAGGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.666001319885254}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAGGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.666001319885254}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGGLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.303130865097046}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSCWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.471933126449585}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTIRSYGMGWFRQAPGKEREFVSGISWRGDSTGYA

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYLTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -55.188185930252075}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTDYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -53.46187901496887}


TOKENIZERS_PARALLELISM=(true | false)


Applying Crossover
Mutation of S to Y at 102
Mutation of S to G at 49
Applying Mutation
Applying Crossover
Mutation of S to L at 29
Mutation of A to R at 99
Applying Mutation
Mutation of R to G at 54
Mutation of A to S at 103
Applying Mutation
Applying Crossover
Mutation of R to G at 30
Mutation of T to F at 107
Applying Mutation
Applying Crossover
Mutation of R to G at 54
Mutation of S to L at 29
Applying Mutation
Mutation of D to K at 56
Mutation of A to C at 103
Applying Mutation
Mutation of S to G at 49
Mutation of R to S at 54
Applying Mutation
Mutation of R to G at 54
Mutation of S to A at 49
Applying Mutation
Applying Crossover
Mutation of L to Q at 108
Mutation of S to N at 31
Applying Mutation
Mutation of A to G at 103
Mutation of A to V at 33
Applying Mutation
Applying Crossover
Mutation of S to A at 49
Mutation of G to L at 106
Applying Mutation
Applying Crossover
Mutation of T to F at 107
Mutation of S to T at 52
Applying Mutation
Mutation of R to S at 30
Mutation of G to A

Output()

Output()

Output()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVGGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.87539577484131}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGGSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.01465821266174}{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYAMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.391528367996216}

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVGGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -51.87539577484131}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAARAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -50.74416732788086}{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKG

TOKENIZERS_PARALLELISM=(true | false)
TOKENIZERS_PARALLELISM=(true | false)
TOKENIZERS_PARALLELISM=(true | false)
TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)

{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTDYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYGTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -53.46187901496887}
{'sequence': 'QVKLEESGGGSVQTGGSLRLTCAASGRTSRSYGMGWFRQAPGKEREFVSGISWRGDSTGYADSVKGRFTISRDNAKNTVDLQMNSLKPEDTAIYYCAAAAGSAWYLTLYEYDYWGQGTQVTVSS', 'esm3-medium-2024-08': -55.188185930252075}


TOKENIZERS_PARALLELISM=(true | false)
TOKENIZERS_PARALLELISM=(true | false)
