In [1]:
import pyrosetta
from pyrosetta import Pose
from Bio import PDB
import os
import pandas as pd
from tqdm import tqdm

# Initialize PyRosetta
pyrosetta.init(extra_options="-mute all")

def download_pdb(pdb_id):
    pdb_file = f"{pdb_id}.pdb"
    if not os.path.exists(pdb_file):
        print(f"Downloading PDB {pdb_id}...")
        pdb_list = PDB.PDBList()
        pdb_list.retrieve_pdb_file(pdb_id, pdir='.', file_format='pdb')
        ent_file = f"pdb{pdb_id}.ent"
        if os.path.exists(ent_file):
            os.rename(ent_file, pdb_file)
    return pdb_file

def load_protein(pdb_id):
    pdb_file = download_pdb(pdb_id)
    pose = Pose()
    pyrosetta.rosetta.core.import_pose.pose_from_file(pose, pdb_file)
    return pose

def score_protein(pose, scorefxn):
    total_energy = scorefxn(pose)
    energy_terms = {
        'Total': total_energy,
        'fa_atr': pose.energies().total_energies()[pyrosetta.rosetta.core.scoring.fa_atr],
        'fa_rep': pose.energies().total_energies()[pyrosetta.rosetta.core.scoring.fa_rep],
        'fa_sol': pose.energies().total_energies()[pyrosetta.rosetta.core.scoring.fa_sol],
        'hbond_sc': pose.energies().total_energies()[pyrosetta.rosetta.core.scoring.hbond_sc],
        'rama_prepro': pose.energies().total_energies()[pyrosetta.rosetta.core.scoring.rama_prepro]
    }
    return energy_terms

def analyze_proteins(proteins):
    scorefxn = pyrosetta.create_score_function('ref2015')
    results = []

    for protein_name, pdb_id in tqdm(proteins.items(), desc="Analyzing proteins"):
        try:
            pose = load_protein(pdb_id)
            energy_terms = score_protein(pose, scorefxn)
            results.append({
                'Protein': protein_name,
                'PDB ID': pdb_id,
                **energy_terms
            })
        except Exception as e:
            print(f"Error processing {protein_name} (PDB ID: {pdb_id}): {str(e)}")

    return pd.DataFrame(results)

# Define Alzheimer's-related proteins
proteins = {
    "Amyloid-beta": "1IYT",
    "Tau protein": "5N5A",
    "Presenilin-1": "5FN2",  # γ-secretase component
    "APOE4": "6NCO",  # Apolipoprotein E4
    "TREM2": "5ELI",  # Triggering receptor expressed on myeloid cells 2
    "α-Synuclein": "1XQ8",  # Often co-occurs with Alzheimer's
    "BACE1": "5MCQ"  # β-secretase 1, involved in Aβ production
}

# Analyze proteins
results_df = analyze_proteins(proteins)

# Display results
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
print(results_df)

# Save results to CSV
results_df.to_csv("alzheimers_protein_analysis.csv", index=False)
print("\nResults saved to alzheimers_protein_analysis.csv")

┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2024 [Rosetta PyRosetta4.Release.python39.m1 2024.24+release.ca096dac4f43ee5ee195f87f3703a520fcf60cf9 2024-06-14T16:57:57] retrieved from: http://www.pyrosetta.org


Analyzing proteins:   0%|                                 | 0/7 [00:00<?, ?it/s]

Downloading PDB 1IYT...
Downloading PDB structure '1iyt'...


Analyzing proteins:  14%|███▌                     | 1/7 [00:00<00:05,  1.13it/s]

Downloading PDB 5N5A...
Downloading PDB structure '5n5a'...


Analyzing proteins:  29%|███████▏                 | 2/7 [00:02<00:05,  1.05s/it]

Downloading PDB 5FN2...
Downloading PDB structure '5fn2'...


Analyzing proteins:  43%|██████████▋              | 3/7 [00:03<00:04,  1.10s/it]

Downloading PDB 6NCO...
Downloading PDB structure '6nco'...


Analyzing proteins:  57%|██████████████▎          | 4/7 [00:03<00:02,  1.19it/s]

Downloading PDB 5ELI...
Downloading PDB structure '5eli'...


Analyzing proteins:  71%|█████████████████▊       | 5/7 [00:04<00:01,  1.33it/s]

Downloading PDB 1XQ8...
Downloading PDB structure '1xq8'...


Analyzing proteins:  86%|█████████████████████▍   | 6/7 [00:04<00:00,  1.54it/s]

Downloading PDB 5MCQ...
Downloading PDB structure '5mcq'...


Analyzing proteins: 100%|█████████████████████████| 7/7 [00:05<00:00,  1.28it/s]

        Protein PDB ID         Total         fa_atr        fa_rep  \
0  Amyloid-beta   1IYT  5.654692e+06  -37931.231086  1.029220e+07   
1   Tau protein   5N5A  1.741409e+07 -120627.319316  3.168264e+07   
2  Presenilin-1   5FN2  7.462686e+03   -8077.098081  7.486665e+03   
3         APOE4   6NCO -1.630512e+02    -867.336869  1.713499e+02   
4         TREM2   5ELI  1.732353e+03   -1204.115570  3.334459e+03   
5   α-Synuclein   1XQ8  4.905712e+02    -475.918555  1.964660e+02   
6         BACE1   5MCQ  1.887272e+03   -2485.734529  4.744738e+03   

          fa_sol   hbond_sc  rama_prepro  
0   29404.494178  -8.138404   660.562420  
1  100095.126336 -61.256368  1969.346433  
2    4825.256798 -15.038719  2097.609414  
3     629.071967 -21.438001   -50.682066  
4     777.320714 -13.062664    62.845492  
5     441.268341   0.000000   275.438193  
6    1439.692713 -28.999614    37.006013  

Results saved to alzheimers_protein_analysis.csv



