In [6]:
# Cell [1]: Import PyRosetta and load ubiquitin structure

import pyrosetta
import os
from urllib import request

# Initialize PyRosetta
pyrosetta.init()

# Download the ubiquitin PDB file
pdb_id = "1ubq"
pdb_filename = f"{pdb_id}.pdb"
if not os.path.exists(pdb_filename):
    url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
    request.urlretrieve(url, pdb_filename)
    print(f"Downloaded {pdb_filename}")
else:
    print(f"{pdb_filename} already exists")

# Load the ubiquitin structure
pose = pyrosetta.pose_from_pdb(pdb_filename)

print(f"Ubiquitin structure loaded. Total residues: {pose.total_residue()}")
print(f"Sequence: {pose.sequence()}")


INFO:pyrosetta.rosetta:Found rosetta database at: /opt/anaconda3/envs/rosetta/lib/python3.9/site-packages/pyrosetta-2024.24+release.ca096dac4f-py3.9-macosx-11.0-arm64.egg/pyrosetta/database; using it....
INFO:pyrosetta.rosetta:┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.Release.python39.m1 2024.24+release.ca096dac4f43ee5ee195f87f3703a520fcf60cf9 2024-06-14T

┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.Release.python39.m1 2024.24+release.ca096dac4f43ee5ee195f87f3703a520fcf60cf9 2024-06-14T16:57:57] retrieved from: http://www.pyrosetta.org
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.Release.python39.m1 r384 2024.24+release.ca096dac4f ca096dac4f43ee5ee195

In [7]:
# Cell [2]: Initial analysis of K48 in ubiquitin

def log_info(message):
    print(f"INFO: {message}")

def log_error(message):
    print(f"ERROR: {message}")

# Create a score function
scorefxn = pyrosetta.get_fa_scorefxn()
log_info("Score function created")

# Calculate total energy of the pose
total_energy = scorefxn(pose)
log_info(f"Total energy of the pose: {total_energy}")

# Analyze K48
k48_residue = pose.residue(48)
k48_chi_angles = [pose.chi(i, 48) for i in range(1, k48_residue.nchi() + 1)]
log_info(f"K48 chi angles: {k48_chi_angles}")

# Get energy of K48
k48_energy = pose.energies().residue_total_energy(48)
log_info(f"K48 total energy: {k48_energy}")

log_info(f"K48 residue type: {k48_residue.name()}")

# Identify neighboring residues
neighbors = []
for i in range(1, pose.total_residue() + 1):
    if i != 48 and pose.residue(i).nbr_atom_xyz().distance(pose.residue(48).nbr_atom_xyz()) <= 10.0:
        neighbors.append(i)

log_info(f"Neighboring residues within 10Å: {neighbors}")

core.scoring.ScoreFunctionFactory: SCOREFUNCTION: ref2015
INFO: Score function created
INFO: Total energy of the pose: 32.677753719260835
INFO: K48 chi angles: [-61.53068189241311, 173.65209206867237, -111.86509811555939, -58.779603712135824]
INFO: K48 total energy: 3.2047030973425494
INFO: K48 residue type: LYS
INFO: Neighboring residues within 10Å: [43, 44, 45, 46, 47, 49, 50, 51, 54, 59]


In [8]:
# Cell [3]: Summary of K48 in Ubiquitin

log_info(f"Summary of K48 (Lysine 48) in Ubiquitin:")
log_info(f"Residue type: {k48_residue.name()}")
log_info(f"Total energy: {k48_energy}")
log_info(f"χ angles: {k48_chi_angles}")
log_info(f"Neighboring residues within 10Å: {neighbors}")

print("Summary complete.")

INFO: Summary of K48 (Lysine 48) in Ubiquitin:
INFO: Residue type: LYS
INFO: Total energy: 3.2047030973425494
INFO: χ angles: [-61.53068189241311, 173.65209206867237, -111.86509811555939, -58.779603712135824]
INFO: Neighboring residues within 10Å: [43, 44, 45, 46, 47, 49, 50, 51, 54, 59]
Summary complete.


In [10]:
# Cell [4]: Detailed energy breakdown for K48 in ubiquitin

def log_info(message):
    print(f"INFO: {message}")

# Ensure we have the pose and scorefxn from previous cells
if 'pose' not in globals() or 'scorefxn' not in globals():
    raise NameError("Pose or scorefxn not found. Please run the previous cells first.")

# Get energy breakdown for K48
log_info("Detailed energy breakdown for K48:")
energy_map = pose.energies().residue_total_energies(48)

# Get non-zero weighted score types
nonzero_score_types = scorefxn.get_nonzero_weighted_scoretypes()

# Iterate through non-zero weighted score types and display their values
for score_type in nonzero_score_types:
    energy_value = energy_map[score_type]
    if energy_value != 0:
        log_info(f"{score_type}: {energy_value}")

# Calculate and display total energy for K48
total_k48_energy = energy_map.sum()
log_info(f"Total energy for K48: {total_k48_energy}")

# Compare with neighboring residues
log_info("\nEnergy comparison with neighboring residues:")
neighbors = [i for i in range(1, pose.total_residue() + 1) 
             if i != 48 and pose.residue(i).nbr_atom_xyz().distance(pose.residue(48).nbr_atom_xyz()) <= 10.0]

for neighbor in neighbors:
    neighbor_energy = pose.energies().residue_total_energy(neighbor)
    log_info(f"Residue {neighbor} ({pose.residue(neighbor).name3()}): {neighbor_energy}")

print("Detailed energy analysis complete.")

INFO: Detailed energy breakdown for K48:
INFO: ScoreType.fa_atr: -5.072177976571727
INFO: ScoreType.fa_rep: 0.5191073961399223
INFO: ScoreType.fa_sol: 4.541407674445452
INFO: ScoreType.fa_intra_rep: 4.035531842943009
INFO: ScoreType.fa_intra_sol_xover4: 0.5109835353283161
INFO: ScoreType.lk_ball_wtd: -0.0768065874854091
INFO: ScoreType.fa_elec: -2.3722608407084538
INFO: ScoreType.omega: 0.1321583802033019
INFO: ScoreType.fa_dun: 8.581201223325941
INFO: ScoreType.p_aa_pp: 0.2265634355117949
INFO: ScoreType.ref: -0.71458
INFO: ScoreType.rama_prepro: -0.2515371210530125
INFO: Total energy for K48: 21.482719322253025
INFO: 
Energy comparison with neighboring residues:
INFO: Residue 43 (LEU): -4.926903344707124
INFO: Residue 44 (ILE): -1.2024737740003961
INFO: Residue 45 (PHE): -2.3016427320487893
INFO: Residue 46 (ALA): 0.7985191403748607
INFO: Residue 47 (GLY): 0.01671206940793099
INFO: Residue 49 (GLN): 2.8767992428197195
INFO: Residue 50 (LEU): -4.506339038369492
INFO: Residue 51 (GLU):

In [13]:
# Cell [5]: Prepare for Monte Carlo side-chain packing

import pyrosetta
from pyrosetta.rosetta.protocols.minimization_packing import PackRotamersMover

def log_info(message):
    print(f"INFO: {message}")

def repack_with_increased_sampling(pose, residue_number, num_attempts=10):
    best_pose = pose.clone()
    best_energy = scorefxn(best_pose)
    
    for _ in range(num_attempts):
        test_pose = pose.clone()
        task = pyrosetta.standard_packer_task(test_pose)
        task.temporarily_fix_everything()
        task.temporarily_set_pack_residue(residue_number, True)
        
        packer = PackRotamersMover(scorefxn, task)
        packer.apply(test_pose)
        
        test_energy = scorefxn(test_pose)
        if test_energy < best_energy:
            best_pose = test_pose.clone()
            best_energy = test_energy
    
    return best_pose

log_info("Prepared for Monte Carlo side-chain packing.")
print("Ready for side-chain packing of K48.")


INFO: Prepared for Monte Carlo side-chain packing.
Ready for side-chain packing of K48.


In [14]:
# Cell [6]: Perform Monte Carlo side-chain packing on K48

original_pose = pose.clone()
repacked_pose = repack_with_increased_sampling(pose, 48)

log_info("Monte Carlo side-chain packing completed.")
log_info(f"Original K48 energy: {pose.energies().residue_total_energy(48)}")
log_info(f"Repacked K48 energy: {repacked_pose.energies().residue_total_energy(48)}")

original_chi = [original_pose.chi(i, 48) for i in range(1, original_pose.residue(48).nchi() + 1)]
repacked_chi = [repacked_pose.chi(i, 48) for i in range(1, repacked_pose.residue(48).nchi() + 1)]

log_info(f"Original K48 χ angles: {original_chi}")
log_info(f"Repacked K48 χ angles: {repacked_chi}")

# Compare energy breakdown before and after repacking
log_info("\nDetailed energy breakdown for K48 before repacking:")
original_energy_map = original_pose.energies().residue_total_energies(48)
for score_type in scorefxn.get_nonzero_weighted_scoretypes():
    energy_value = original_energy_map[score_type]
    if energy_value != 0:
        log_info(f"{score_type}: {energy_value}")

log_info("\nDetailed energy breakdown for K48 after repacking:")
repacked_energy_map = repacked_pose.energies().residue_total_energies(48)
for score_type in scorefxn.get_nonzero_weighted_scoretypes():
    energy_value = repacked_energy_map[score_type]
    if energy_value != 0:
        log_info(f"{score_type}: {energy_value}")

print("Monte Carlo side-chain packing analysis complete.")

core.pack.task: Packer task: initialize from command line()
core.pack.pack_rotamers: built 206 rotamers at 1 positions.
core.pack.interaction_graph.interaction_graph_factory: Instantiating PDInteractionGraph
core.pack.task: Packer task: initialize from command line()
core.pack.pack_rotamers: built 206 rotamers at 1 positions.
core.pack.interaction_graph.interaction_graph_factory: Instantiating PDInteractionGraph
core.pack.task: Packer task: initialize from command line()
core.pack.pack_rotamers: built 206 rotamers at 1 positions.
core.pack.interaction_graph.interaction_graph_factory: Instantiating PDInteractionGraph
core.pack.task: Packer task: initialize from command line()
core.pack.pack_rotamers: built 206 rotamers at 1 positions.
core.pack.interaction_graph.interaction_graph_factory: Instantiating PDInteractionGraph
core.pack.task: Packer task: initialize from command line()
core.pack.pack_rotamers: built 206 rotamers at 1 positions.
core.pack.interaction_graph.interaction_graph_fa

In [18]:
# Cell [7]: Analyze K48 conformation changes

import pyrosetta

def log_info(message):
    print(f"INFO: {message}")

def analyze_residue(pose, residue_number):
    residue = pose.residue(residue_number)
    chi_angles = [pose.chi(i, residue_number) for i in range(1, residue.nchi() + 1)]
    energy = pose.energies().residue_total_energy(residue_number)
    
    log_info(f"Residue {residue_number} ({residue.name3()}):")
    log_info(f"  χ angles: {chi_angles}")
    log_info(f"  Energy: {energy}")

log_info("Original K48 conformation:")
analyze_residue(original_pose, 48)

log_info("\nRepacked K48 conformation:")
analyze_residue(repacked_pose, 48)


INFO: Original K48 conformation:
INFO: Residue 48 (LYS):
INFO:   χ angles: [-61.53068189241311, 173.65209206867237, -111.86509811555939, -58.779603712135824]
INFO:   Energy: 3.2047030973425494
INFO: 
Repacked K48 conformation:
INFO: Residue 48 (LYS):
INFO:   χ angles: [-64.78238295200563, -178.0826304392288, -179.56764443438138, 179.82845509198222]
INFO:   Energy: -1.3957756642969774


In [26]:
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def estimate_interaction_energy(pose, res1, res2, scorefxn):
    logger.debug(f"Estimating interaction energy between residues {res1} and {res2}")
    
    # Calculate energy of the whole pose
    total_energy = scorefxn(pose)
    
    # Calculate energy of individual residues
    energy_res1 = pose.energies().residue_total_energy(res1)
    energy_res2 = pose.energies().residue_total_energy(res2)
    
    # Estimate interaction energy as the difference
    interaction_energy = total_energy - (energy_res1 + energy_res2)
    
    logger.debug(f"Estimated interaction energy: {interaction_energy}")
    return interaction_energy

def analyze_interactions(pose, residue_number, scorefxn):
    logger.info(f"Analyzing interactions for residue {residue_number}")
    target_residue = pose.residue(residue_number)
    
    # Find neighboring residues within 10 Å
    neighbors = [i for i in range(1, pose.total_residue() + 1) 
                 if i != residue_number and pose.residue(i).nbr_atom_xyz().distance(target_residue.nbr_atom_xyz()) <= 10.0]
    logger.debug(f"Found {len(neighbors)} neighboring residues")
    
    for neighbor in neighbors:
        interaction_energy = estimate_interaction_energy(pose, residue_number, neighbor, scorefxn)
        logger.info(f"Interaction energy between K48 and residue {neighbor} ({pose.residue(neighbor).name3()}): {interaction_energy:.4f}")

# Create scorefxn
scorefxn = pyrosetta.get_fa_scorefxn()
logger.info("Created score function")

# Evaluate poses to update energies
scorefxn(original_pose)
scorefxn(repacked_pose)
logger.info("Evaluated poses with score function")

logger.info("\nAnalyzing interactions for original conformation:")
analyze_interactions(original_pose, 48, scorefxn)

logger.info("\nAnalyzing interactions for repacked conformation:")
analyze_interactions(repacked_pose, 48, scorefxn)

# Compare chi angles
original_chis = [original_pose.chi(i, 48) for i in range(1, original_pose.residue(48).nchi() + 1)]
repacked_chis = [repacked_pose.chi(i, 48) for i in range(1, repacked_pose.residue(48).nchi() + 1)]

logger.info("\nChi angles comparison for K48:")
logger.info(f"Original: {[f'{chi:.2f}' for chi in original_chis]}")
logger.info(f"Repacked: {[f'{chi:.2f}' for chi in repacked_chis]}")

# Calculate RMSD of K48 side chain
def calculate_side_chain_rmsd(pose1, pose2, residue_number):
    res1 = pose1.residue(residue_number)
    res2 = pose2.residue(residue_number)
    
    # Get side chain heavy atoms (exclude backbone atoms)
    sc_atoms1 = [res1.xyz(i) for i in range(1, res1.natoms() + 1) if res1.atom_name(i).strip() not in ['N', 'CA', 'C', 'O']]
    sc_atoms2 = [res2.xyz(i) for i in range(1, res2.natoms() + 1) if res2.atom_name(i).strip() not in ['N', 'CA', 'C', 'O']]
    
    if len(sc_atoms1) != len(sc_atoms2):
        logger.error("Side chain atom counts do not match")
        return None
    
    # Calculate RMSD
    squared_diff_sum = sum((a1.x - a2.x)**2 + (a1.y - a2.y)**2 + (a1.z - a2.z)**2 for a1, a2 in zip(sc_atoms1, sc_atoms2))
    rmsd = (squared_diff_sum / len(sc_atoms1)) ** 0.5
    
    return rmsd

side_chain_rmsd = calculate_side_chain_rmsd(original_pose, repacked_pose, 48)
if side_chain_rmsd is not None:
    logger.info(f"\nRMSD of K48 side chain between original and repacked conformations: {side_chain_rmsd:.4f} Å")
else:
    logger.error("Failed to calculate RMSD")

core.scoring.ScoreFunctionFactory: SCOREFUNCTION: ref2015


INFO:__main__:Created score function
INFO:__main__:Evaluated poses with score function
INFO:__main__:
Analyzing interactions for original conformation:
INFO:__main__:Analyzing interactions for residue 48
INFO:__main__:Interaction energy between K48 and residue 43 (LEU): 34.4000
INFO:__main__:Interaction energy between K48 and residue 44 (ILE): 30.6755
INFO:__main__:Interaction energy between K48 and residue 45 (PHE): 31.7747
INFO:__main__:Interaction energy between K48 and residue 46 (ALA): 28.6745
INFO:__main__:Interaction energy between K48 and residue 47 (GLY): 29.4563
INFO:__main__:Interaction energy between K48 and residue 49 (GLN): 26.5963
INFO:__main__:Interaction energy between K48 and residue 50 (LEU): 33.9794
INFO:__main__:Interaction energy between K48 and residue 51 (GLU): 25.6379
INFO:__main__:Interaction energy between K48 and residue 54 (ARG): 23.6730
INFO:__main__:Interaction energy between K48 and residue 59 (TYR): 32.9299
INFO:__main__:
Analyzing interactions for repa

# K48 Repacking Analysis Summary

## Interpretation

1. **Side Chain Reorientation**: 
   The repacking has resulted in a significant reorientation of the K48 side chain, particularly in its terminal portion. This is evident from the large changes in Chi3 and Chi4 angles and the substantial RMSD.

2. **Interaction Energy Changes**: 
   Despite the large conformational change, the interaction energies with neighboring residues have only changed slightly. This suggests that while the side chain has reoriented, it hasn't dramatically altered its interactions with the local environment.

3. **Potential Energy Increase**: 
   The slight increase in interaction energies for all neighboring residues in the repacked conformation might indicate that the repacking has resulted in a less energetically favorable local environment for K48. However, this local increase might be offset by more favorable interactions elsewhere in the structure.

4. **Conservation of Strong Interactions**: 
   The residues with which K48 interacts most strongly (GLU51 and ARG54) remain the same in both conformations, suggesting that these interactions might be important for the local structure or function.

5. **Flexibility of Lysine Side Chain**: 
   The significant change in conformation with relatively small changes in interaction energies demonstrates the flexibility of the lysine side chain and its ability to adapt to different local environments.

## Recommendations for Further Analysis

1. Examine the overall energy of the entire protein before and after repacking to see if the local energy increase around K48 is compensated elsewhere.

2. Analyze the solvent accessibility of K48 in both conformations to see if the repacking has changed its exposure to the solvent.

3. If K48 is near any functional sites of the protein, examine how the repacking might have affected these sites.

4. Consider running a longer simulation or using more advanced sampling techniques to see if the repacked conformation is stable or if it would revert to something closer to the original conformation.

5. If experimental data is available (e.g., NMR data), compare both conformations to the experimental results to see which one aligns better with the observed data.

## Conclusion

This analysis provides insights into the local environment changes of K48 after repacking, highlighting the flexibility of lysine side chains and the complex balance of interactions in protein structures.