In [None]:
import py3Dmol
from bio_datasets import load_dataset
from bio_datasets import Features, Value, StructureFeature
from bio_datasets.structure.utils import to_pdb_string

ds = load_dataset('biodatasets/pdb', cache_dir='/Volumes/SSKSSD/.cache/huggingface/datasets', split="train")
index = ds["id"].index("1a34.bcif")
ex = ds[index]

In many cases a biological molecule such as a protein achieves its function not as a single copy, but via association with other copies of itself and with other molecules. Where a molecular assembly consists in multiple copies of a single molecular subunit, it is typical to deposit only the coordinates associated with the `asymmetric' subunit in the PDB, together with instructions on the sequence of symmetry operations that must be performed to reconstruct the full form via assembly of transformed copies of the asymmetric unit.

If using StructureFeature / ProteinStructureFeature with mmCIF files set `load_assembly=True` to automatically generate the full biological assembly from the coordinates that are explicitly stored in the mmCIF file.

For more background on biological assemblies, see [the PDB docs](https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies)

In [None]:
view = py3Dmol.view(width=400, height=400)
view.addModel(to_pdb_string(ex["structure"]), 'pdb')
view.setStyle({'cartoon': {'color': 'spectrum'}})
view.zoomTo()
view.show()


In [None]:
# todo: add a clone method?
feat = Features(id=Value("string"), structure=StructureFeature(load_assembly=True))
# because encoding doesn't change, casting is unnecessary, and we can just set the features explicitly
# todo: copy ds?
ds.info.features = feat

In [None]:
assembly_ex = ds[index]

In [None]:
import numpy as np
view = py3Dmol.view(width=400, height=400)
view.addModel(to_pdb_string(assembly_ex["structure"]), 'pdb')
view.setStyle({'cartoon': {'color': 'spectrum'}})
view.zoomTo()
view.show()