In [None]:
# Install Biopython
!pip install biopython

# Upload your PDB file
from google.colab import files
uploaded = files.upload()

# Extract sequence
from Bio.PDB import PDBParser, PPBuilder
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio import SeqIO

# Load and parse uploaded file
import io

for filename in uploaded:
    pdb_id = filename.split('.')[0]
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure(pdb_id, filename)

    ppb = PPBuilder()
    records = []

    for model in structure:
        for chain in model:
            seq = ''
            for pp in ppb.build_peptides(chain):
                seq += str(pp.get_sequence())
            if seq:  # Avoid empty sequences
                record = SeqRecord(Seq(seq), id=f"{pdb_id}_chain_{chain.id}", description="")
                records.append(record)

    # Save as FASTA
    fasta_filename = pdb_id + "_output.fasta"
    with open(fasta_filename, "w") as f:
        SeqIO.write(records, f, "fasta")

    print(f"‚úÖ FASTA sequence saved as: {fasta_filename}")

    # Optional: Show sequence inline
    for rec in records:
        print(f">{rec.id}\n{rec.seq}")


Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.3/3.3 MB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


Saving system.pdbqt to system.pdbqt
‚úÖ FASTA sequence saved as: system_output.fasta
>system_chain_A
SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENL
>system_chain_B
SVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENL
>system_chain_C
SSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENL


In [None]:
!pip install biopython
!apt install -y dssp  # Only for Colab/Linux

from Bio.PDB import PDBParser, PPBuilder
from Bio.PDB.DSSP import DSSP
from Bio.Blast import NCBIWWW, NCBIXML
import io
import os
import tempfile
import ipywidgets as widgets
from IPython.display import display

# === File uploader ===
uploader = widgets.FileUpload(accept='.pdb,.pdbqt', multiple=False)
display(widgets.HTML(value="<h4>üìÅ Upload a PDB or PDBQT File</h4>"))
display(uploader)

# === Handle File Upload and Run Analysis ===
def handle_file_upload(change):
    if len(uploader.value) == 0:
        print("No file uploaded.")
        return

    for filename, fileinfo in uploader.value.items():
        suffix = os.path.splitext(filename)[1].lower()
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb")

        content = fileinfo['content']

        # Clean PDBQT to valid PDB if needed
        if suffix == ".pdbqt":
            content = convert_pdbqt_to_pdb(content)

        temp_file.write(content)
        temp_file.close()

        analyze_pdb(temp_file.name)

# === Convert PDBQT to PDB ===
def convert_pdbqt_to_pdb(pdbqt_bytes):
    pdb_lines = pdbqt_bytes.decode().splitlines()
    clean_lines = [line for line in pdb_lines if line.startswith("ATOM") or line.startswith("HETATM")]
    return "\n".join(clean_lines).encode()

# === Analyze PDB structure ===
def analyze_pdb(pdb_filename):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("prot", pdb_filename)

    print("\nüîç Analyzing structure:", os.path.basename(pdb_filename))

    # --- DSSP Analysis ---
    try:
        model = structure[0]
        dssp = DSSP(model, pdb_filename)
        misfolded, ratio = check_misfolding(dssp)
        if misfolded:
            print(f"‚ö†Ô∏è  Potential misfolding detected. Outlier ratio: {ratio:.2f}")
        else:
            print("‚úÖ No significant misfolding detected.")
    except Exception as e:
        print("‚ö†Ô∏è Could not perform DSSP analysis:", e)

    # --- Sequence Extraction ---
    seqs = []
    for model in structure:
        for chain in model:
            ppb = PPBuilder()
            for pp in ppb.build_peptides(chain):
                seqs.append(str(pp.get_sequence()))
    full_seq = ''.join(seqs)

    # --- Run BLAST Search ---
    if full_seq:
        run_blast(full_seq)
    else:
        print("‚ö†Ô∏è No sequence found in structure.")

# === Misfolding check using DSSP angles ===
def check_misfolding(dssp):
    outliers = 0
    total = 0
    for k in dssp.keys():
        phi, psi = dssp[k][4], dssp[k][5]
        if phi == 'NA' or psi == 'NA':
            continue
        total += 1
        if not (-180 <= phi <= 180 and -180 <= psi <= 180):
            outliers += 1
    if total == 0:
        return False, 0
    misfold_ratio = outliers / total
    return misfold_ratio > 0.1, misfold_ratio

# === BLAST Search ===
def run_blast(sequence):
    print("\nüöÄ Running BLAST search (this may take 10‚Äì30 sec)...")
    try:
        result_handle = NCBIWWW.qblast("blastp", "nr", sequence)
        blast_record = NCBIXML.read(result_handle)

        print("\nüß¨ Top 5 BLAST Matches:")
        for alignment in blast_record.alignments[:5]:
            print(f"\nüîó {alignment.title}")
            print(f"   üîç Length: {alignment.length}")
            for hsp in alignment.hsps:
                print(f"   üîπ Score: {hsp.score}, E-value: {hsp.expect}")
                print(f"   Match snippet: {hsp.query[:75]}...\n")
        return True
    except Exception as e:
        print("‚ùå BLAST failed:", e)
        return False

# === Connect upload to analysis ===
uploader.observe(handle_file_upload, names='value')



Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libcifpp-data libcifpp2
The following NEW packages will be installed:
  dssp libcifpp-data libcifpp2
0 upgraded, 3 newly installed, 0 to remove and 34 not upgraded.
Need to get 1,967 kB of archives.
After this operation, 15.0 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libcifpp-data all 2.0.5-1build1 [437 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libcifpp2 amd64 2.0.5-1build1 [1,019 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 dssp amd64 4.0.4-1 [511 kB]
Fetched 1,967 kB in 1s (2,457 kB/s)
Preconfiguring packages ...
Selecting previously unselected package libcifpp-data.
(Reading database ... 126333 files and directories currently installed.)
Preparing to unpack .../libcifpp-data_2.0.5-1build1_all.deb ...
Unpacking libcifpp-data (2.0.5-1bu

HTML(value='<h4>üìÅ Upload a PDB or PDBQT File</h4>')

FileUpload(value={}, accept='.pdb,.pdbqt', description='Upload')


üîç Analyzing structure: tmpfyb7s_rg.pdb
‚úÖ No significant misfolding detected.

üöÄ Running BLAST search (this may take 10‚Äì30 sec)...

üß¨ Top 5 BLAST Matches:

üîó pdb|6RZ3|A Chain A, Cellular tumor antigen p53 [Homo sapiens]
   üîç Length: 232
   üîπ Score: 1058.0, E-value: 2.77966e-138
   Match snippet: SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQH...

   üîπ Score: 1055.0, E-value: 9.50343e-138
   Match snippet: SSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHM...

   üîπ Score: 1051.0, E-value: 3.24887e-137
   Match snippet: SVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMT...


üîó pdb|1YCS|A Chain A, P53 [Homo sapiens] >pdb|4XR8|C Chain C, Cellular tumor antigen p53 [Homo sapiens] >pdb|4XR8|D Chain D, Cellular tumor antigen p53 [Homo sapiens]
   üîç Length: 199
   üîπ Score: 1054.0, E-value: 3.98149e-138
   Match snippet: SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPG