# 02 â€” Structural Features for 1EY0A

This section:
- Parses the PDB structure for 1EY0 (chain A)
- Extracts the wild-type sequence
- Saves it as FASTA for documentation / GA later

In [1]:
from pathlib import Path
from Bio.PDB import PDBParser, PPBuilder

PROJECT_ROOT = Path.cwd().parents[0] if Path.cwd().name == "notebooks" else Path.cwd()
PDB_PATH = PROJECT_ROOT / "data" / "raw" / "pdb" / "1EY0.pdb"
SEQ_OUT = PROJECT_ROOT / "data" / "processed" / "sequence_1EY0A.fasta"

print("Project root:", PROJECT_ROOT)
print("PDB path:", PDB_PATH)
print("FASTA out:", SEQ_OUT)

Project root: /Users/saturnine/protein-multimutant
PDB path: /Users/saturnine/protein-multimutant/data/raw/pdb/1EY0.pdb
FASTA out: /Users/saturnine/protein-multimutant/data/processed/sequence_1EY0A.fasta


## Extract chain A sequence and save FASTA

In [2]:
assert PDB_PATH.exists(), f"PDB file not found at {PDB_PATH}"

parser = PDBParser(QUIET=True)
structure = parser.get_structure("1EY0", PDB_PATH)

# Use first model, chain A
model = structure[0]
chain = model["A"]

ppb = PPBuilder()
peptides = ppb.build_peptides(chain)

# Many structures are single continuous chain; if multiple segments, concatenate.
seq_str = "".join(str(pp.get_sequence()) for pp in peptides)
print(f"1EY0A sequence length: {len(seq_str)}")
print(seq_str[:80] + ("..." if len(seq_str) > 80 else ""))

# Save as FASTA
SEQ_OUT.parent.mkdir(parents=True, exist_ok=True)
with open(SEQ_OUT, "w") as f:
    f.write(">1EY0A_wt\n")
    # wrap lines at 60 chars for readability
    for i in range(0, len(seq_str), 60):
        f.write(seq_str[i:i+60] + "\n")

print("Saved FASTA to:", SEQ_OUT)

1EY0A sequence length: 136
KLHKEPATLIKAIDGDTVKLMYKGQPMTFRLLLVDTPETKHPKKGVEKYGPEASAFTKKMVENAKKIEVEFDKGQRTDKY...
Saved FASTA to: /Users/saturnine/protein-multimutant/data/processed/sequence_1EY0A.fasta
