In [18]:
import torch
from transformers import AutoTokenizer, EsmForProteinFolding
import matplotlib.pyplot as plt
from Bio.PDB import PDBParser
import numpy as np
import tempfile
import os

# Load tokenizer and folding model
tokenizer = AutoTokenizer.from_pretrained("facebook/esmfold_v1")
fold_model = EsmForProteinFolding.from_pretrained("facebook/esmfold_v1")
fold_model.eval()

# Define neuron steering hook
def steer_neuron(model, layer_idx, neuron_idx, boost_factor):
    def hook_fn(module, input, output):
        output[:, :, neuron_idx] *= boost_factor
        return output
    return model.esm.encoder.layer[layer_idx].output.dense.register_forward_hook(hook_fn)

# Radius of Gyration
def compute_radius_of_gyration(pdb_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_path)
    atoms = [atom for atom in structure.get_atoms() if atom.element != "H"]
    coords = np.array([atom.coord for atom in atoms])
    centroid = np.mean(coords, axis=0)
    return np.sqrt(np.mean(np.sum((coords - centroid)**2, axis=1)))

# Save structure
def save_pdb_to_tempfile(pdb_str):
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb")
    with open(tmp.name, "w") as f:
        f.write(pdb_str)
    return tmp.name

# Run structure prediction and compute Rg
def predict_and_measure(sequence, boost_factors, layer_idx, neuron_idx):
    results = []

    for boost in boost_factors:
        print(f"Boost factor: {boost}")
        hook = steer_neuron(fold_model, layer_idx, neuron_idx, boost)

        with torch.no_grad():
            inputs = tokenizer(sequence, return_tensors="pt")
            output = fold_model(**inputs)
            pdb_str = output.predicted_pdb

        # Save & measure
        pdb_path = save_pdb_to_tempfile(pdb_str)
        rg = compute_radius_of_gyration(pdb_path)
        results.append(rg)
        os.remove(pdb_path)

        hook.remove()

    return results

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   1%|          | 83.9M/8.53G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
# first ensure that the sequence we try to predict DOES NOT already have a high value for the feature we are interested in
# this is so that we can argue that the boost factor is really having an effect
sequence = "MKTAYIAKQRQISFVKSHFSRQDILDLI"

with torch.no_grad():
    inputs = tokenizer(sequence, return_tensors="pt")
    output = fold_model(**inputs)
    pdb_str = output.predicted_pdb

# Save & measure
pdb_path = save_pdb_to_tempfile(pdb_str)
rg = compute_radius_of_gyration(pdb_path)

print(f"Radius of Gyration: {rg} Å")
os.remove(pdb_path)

In [None]:
# Parameters
sequence = "MKTAYIAKQRQISFVKSHFSRQDILDLI"
LAYER_INDEX = 3
NEURON_INDEX = 256

# TODO: we might want to try using negative range for boost factors too
boost_factors = [round(x, 1) for x in torch.arange(1.0, 5.1, 0.5).tolist()]

# Run experiment
rgs = predict_and_measure(sequence, boost_factors, LAYER_INDEX, NEURON_INDEX)

# Plot
plt.plot(boost_factors, rgs, marker='o')
plt.xlabel("Boost Factor")
plt.ylabel("Radius of Gyration (Å)")
plt.title("Neuron Steering Effect on Protein Compactness")
plt.grid(True)
plt.show()