In [6]:
# !pip install nglview
!jupyter-nbextension enable nglview --py --sys-prefix

Enabling notebook extension nglview-js-widgets/extension...
      - Validating: [32mOK[0m


In [None]:
!pip install biopython



In [None]:
# 1. Install dependencies (run once in your environment)
# pip install biopython nglview

# 2. Import the necessary modules
from Bio.PDB.MMCIFParser import MMCIFParser
import nglview as nv

# 3. Parse your mmCIF file
parser    = MMCIFParser(QUIET=True)
structure = parser.get_structure("prot_dna_complex", "/playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_0.cif")  # replace with your file path

# 4. Create and display the 3D view
view = nv.show_biopython(structure)  # nglview reads the Biopython Structure object[3]
view.add_cartoon(selection="protein", color="blue")        # draw protein as blue cartoon
view.add_ball_and_stick(selection="nucleic", color="orange")  # draw DNA as orange sticks
view


NGLWidget()

In [8]:
# 导入必要模块
from Bio.PDB.MMCIFParser import MMCIFParser
import nglview as nv

# 解析 mmCIF 文件
parser = MMCIFParser(QUIET=True)
structure = parser.get_structure("prot_dna_complex", "/playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_3.cif")

# 创建 nglview 视图
view = nv.show_biopython(structure)
view.add_cartoon(selection="protein", color="blue")
view.add_ball_and_stick(selection="nucleic", color="orange")

# 保存为 HTML 文件 - 注意这里使用 nv.write_html 而非 view.write_html
html_file_path = "protein_dna_complex_visualization.html"
nv.write_html(html_file_path, view)  # 正确用法是模块函数，不是对象方法

print(f"Visualization saved as {html_file_path}")



Visualization saved as protein_dna_complex_visualization.html


In [10]:
#!/usr/bin/env python3
"""
Convert an mmCIF file to PDB format using BioPython.
"""

from Bio.PDB import MMCIFParser, PDBIO

def convert_cif_to_pdb(cif_path: str, pdb_path: str):
    parser = MMCIFParser(QUIET=True)
    structure_id = cif_path.rsplit('/', 1)[-1].split('.')[0]
    structure = parser.get_structure(structure_id, cif_path)
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_path)
    print(f"Converted {cif_path} → {pdb_path}")

# ─────────── Edit the paths below ───────────
input_cif  = "/playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_0.cif"
output_pdb = "/playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_0.pdb"

# Perform conversion
convert_cif_to_pdb(input_cif, output_pdb)


Converted /playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_0.cif → /playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_0.pdb


In [None]:
#!/usr/bin/env python3
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns
from Bio.PDB import PDBParser

# ─────────── Edit this ───────────
PDB_FILE = "/playpen/hongxuan/DNA_Protein/Protenix/structures/Protein-DNA Complex/seed_10/predictions/Protein-DNA Complex_seed_10_sample_0.pdb"  # ← change to your PDB path, e.g. "myprotein.pdb"
# ─────────────────────────────────

# styling
sns.set_theme()
sns.set_style("ticks")
rc('font', **{'family':'sans-serif','sans-serif':['Helvetica']})

# derive base name
base = os.path.splitext(os.path.basename(PDB_FILE))[0]
cwd = os.getcwd()

# parse structure
parser = PDBParser(QUIET=True)
model = parser.get_structure(base, PDB_FILE)[0]

# filter out hydrogens
atoms = [a for a in model.get_atoms() if a.element != 'H']

# load data arrays (must be named like <base>_v_prot.npy, etc.)
v_prot_all = np.load(os.path.join(cwd, f"{base}_v_prot.npy"))
interface_atoms = np.load(os.path.join(cwd, f"{base}_edge_index.npy"))[0]
diffs = np.load(os.path.join(cwd, f"{base}_diffs.npy"))
diffs = diffs / diffs.max()

# map (x,y) → atom
atom_dict = {
    tuple(f"{c:.2f}" for c in atom.coord[:2]): atom
    for atom in atoms
}

# collect per‐residue diffs
per_res = {}
for i, coord in enumerate(v_prot_all[interface_atoms]):
    key2 = tuple(f"{c:.2f}" for c in coord[:2])
    atom = atom_dict.get(key2)
    if not atom:
        continue
    res = atom.get_parent()
    chain = res.get_parent().get_id()
    res_id = f"{res.get_resname()}{res.get_id()[1]}{chain}"
    per_res.setdefault(res_id, []).append(diffs[i])

# compute avg & max for each residue
final = {
    res: [np.mean(vals), np.max(vals)]
    for res, vals in per_res.items()
}

# build DataFrame
df = pd.DataFrame.from_dict(final, orient='index', columns=['Average','Max'])
df = df.reset_index().melt(id_vars='index', var_name='Metric', value_name='Value')

# order residues by Max descending
order = df[df.Metric=='Max'].sort_values('Value', ascending=False)['index']

# plot top 20
fig, ax = plt.subplots(figsize=(8,4))
palette = {"Average": "black", "Max": "firebrick"}
sns.barplot(
    x='index', y='Value', hue='Metric', data=df,
    order=order[:20], palette=palette, ax=ax
)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', fontsize=12)
ax.set_xlabel("Interface residue")
ax.set_ylabel("Importance")
ax.legend(title="")
plt.tight_layout()

# save
out_svg = f"{base}_interface_importance.svg"
fig.savefig(out_svg)
print(f"Saved plot to {out_svg}")

In [12]:
!pip install seaborn

Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
