<a href="https://colab.research.google.com/github/angelnavfer/Prediccion-de-mutantes-por-ESM-2/blob/main/Predicci%C3%B3n_de_mutantes_por_ESM_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install fair-esm

import torch
import esm
import pandas as pd
from tqdm import tqdm

# Cargar el modelo ESM-2
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
batch_converter = alphabet.get_batch_converter()
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Determinar la secuencia de la proteína LuxR silvestre
WT_SEQ = (
    "MKNINADDTYRIINKIKACRSNNDINQCLSDMTKMVHCEYYLLAIIYPHSMVKSDISILDNYPKKWRQYYDDANLIKYDPIVDYSNSNHSPINWNIFENNAVNKKSPNVIKEAKTSGLITGFSFPIHTANNGFGMLSFAHSEKDNYIDSLFLHACMNIPLIVPSLVDNYRKINIANNKSNNDLTKREKECLAWACEGKSSWDISKILGCSERTVTFHLTNAQMKLNTTNRCQSISKAILTGAIDCPYFKN"
)

# Definir la función de puntuaje
def score_sequence(seq):
    data = [("sequence", seq)]
    _, _, batch_tokens = batch_converter(data)
    batch_tokens = batch_tokens.to(device)
    with torch.no_grad():
        logits = model(batch_tokens)["logits"]
    log_probs = torch.log_softmax(logits, dim=-1)
    true_tokens = batch_tokens[:, 1:]
    log_probs_for_true = log_probs[:, :-1, :].gather(2, true_tokens.unsqueeze(-1)).squeeze(-1)
    return log_probs_for_true.sum(dim=1).item()

# Obtener el valor de puntuación de la proteína LuxR silvestre
wt_score = score_sequence(WT_SEQ)
print(f"✅ WT Score: {wt_score:.2f}")

# Cargar la puntuación de los mutantes
df = pd.read_csv("single_mutants_scored.csv")

# Calcular las diferencias de puntuación entre los mutantes y el silvestre
df["DeltaScore"] = df["Score"] - wt_score

# Ordenar y actualizar el archivo CSV
df.sort_values(by="DeltaScore", ascending=False, inplace=True)
df.to_csv("single_mutants_scored_delta.csv", index=False)
print("📁 Updated CSV saved as 'single_mutants_scored_delta.csv'")

# Guardar a formato FASTA
with open("single_mutants_scored_delta.fasta", "w") as f:
    for _, row in df.iterrows():
        header = f">{row['Mutant']}|ΔScore:{row['DeltaScore']:.2f}"
        f.write(f"{header}\n{row['Sequence']}\n")
print("📄 FASTA file with delta scores saved as 'single_mutants_scored_delta.fasta'")

✅ WT Score: -1507.66
📁 Updated CSV saved as 'single_mutants_scored_delta.csv'
📄 FASTA file with delta scores saved as 'single_mutants_scored_delta.fasta'
