In [None]:
from tempfile import gettempdir
import biotite
import biotite.structure.io.pdb as pdb
import biotite.database.rcsb as rcsb
import biotite.structure as struc
import numpy as np
import glob
import os
import mdtraj as md
import matplotlib.pyplot as plt

In [None]:
carpeta_structures = """C:\\Users\\guill\\OneDrive - Universitat de Barcelona\\ASSIGNATURES\\TFM\\Experimental\\RNA_predicted\\HBonds\\Proves"""
carpeta_resultados = "..\\Experimental\\RNA_predicted\\HBonds\\Proves"

os.chdir(carpeta_structures)
archivo = "1eka.pdb"

In [39]:
pdb_file = pdb.PDBFile.read(archivo)
atom_array = pdb.get_structure(pdb_file)[0]
nucleotides = atom_array[struc.filter_nucleotides(atom_array)]
        
# Get the residue names and residue ids of the nucleotides
residue_ids = []
residue_names = []
for residue in struc.residue_iter(nucleotides):
    mapped_nucleotide, exact_match = struc.map_nucleotide(residue)
    if mapped_nucleotide is None:
        continue
    residue_ids.append(residue[0].res_id)
    if exact_match:
        residue_names.append(mapped_nucleotide)
    else:
        residue_names.append(mapped_nucleotide.lower())
        
        
# Compute the basepairs
base_pairs = struc.base_pairs(nucleotides)
hbonds = struc.hbond(nucleotides)
glycosidic_bonds = struc.base_pairs_glycosidic_bond(nucleotides, base_pairs)
edges = struc.base_pairs_edge(nucleotides, base_pairs)
base_pairs = struc.get_residue_positions(
    nucleotides, base_pairs.flatten()
).reshape(base_pairs.shape)


annotations = []
for bases, edge_types, orientation in zip(base_pairs, edges, glycosidic_bonds):
    for base, edge in zip(bases, edge_types):
        if orientation == 1:
            annotation = "c"
        else:
            annotation = "t"
        if edge == 1:
            annotation += "W"
        elif edge == 2:
            annotation += "H"
        elif edge == 3:
            annotation += "S"
        annotations.append(annotation)

              
for i in range(base_pairs.shape[0]):
    edge1 = annotations[i*2]
    edge2 = annotations[i*2+1]

    print(
        f"{base_pairs[i, 0]} {residue_names[base_pairs[i, 0]]}"
        f" {glycosidic_bonds[i]} {edge1} - "
        f"{base_pairs[i, 1]} {residue_names[base_pairs[i, 1]]}"
        f" {glycosidic_bonds[i]} {edge2}\n"
    )

for hbond in hbonds:
    donor_atom = nucleotides[hbond[0]]
    hydrogen_atom = nucleotides[hbond[1]]
    acceptor_atom = nucleotides[hbond[2]]
    print(
        f"{donor_atom.res_id} {donor_atom.res_name} "
        f"{donor_atom.atom_name} - "
        f"{acceptor_atom.res_id} {acceptor_atom.res_name} "
        f"{acceptor_atom.atom_name}"
    )

0 G 1 cW - 15 C 1 cW

1 A 1 cW - 14 U 1 cW

2 G 1 cW - 13 C 1 cW

3 U 1 cW - 12 G 1 cW

4 G 1 cW - 11 U 1 cW

5 C 1 cW - 10 G 1 cW

6 U 1 cW - 9 A 1 cW

7 C 1 cW - 8 G 1 cW

8 C N4 - 1 G O6
1 G O5' - 2 A OP2
7 U N3 - 2 A N1
6 C N4 - 3 G O6
5 G N1 - 4 U O2
5 G N2 - 4 U O2
4 U O2' - 5 G O5'
4 U O2' - 5 G O4'
4 U N3 - 5 G O6
3 G N2 - 6 C O2
3 G N1 - 6 C N3
2 A N6 - 7 U O4
1 G N2 - 8 C O2
1 G N1 - 8 C N3
8 C N4 - 1 G O6
1 G O5' - 2 A OP2
7 U N3 - 2 A N1
6 C N4 - 3 G O6
5 G N1 - 4 U O2
4 U O2' - 5 G O4'
4 U N3 - 5 G O6
3 G N2 - 6 C O2
3 G N1 - 6 C N3
2 A N6 - 7 U O4
1 G N2 - 8 C O2
1 G N1 - 8 C N3
