In [198]:
import pandas as pd
import tempfile
import nglview as nv
import os

In [199]:
target_id = '8KAI'

In [200]:
data = pd.read_csv(os.path.join('/media/datasets/rna-folding/train_labels.csv'))

In [201]:
data.head()

Unnamed: 0,ID,resname,resid,x_1,y_1,z_1
0,1SCL_A_1,G,1,13.76,-25.974001,0.102
1,1SCL_A_2,G,2,9.31,-29.638,2.669
2,1SCL_A_3,G,3,5.529,-27.813,5.878
3,1SCL_A_4,U,4,2.678,-24.900999,9.793
4,1SCL_A_5,G,5,1.827,-20.136,11.793


In [202]:
list(set( [id.split('_')[0] for id in data['ID']] ))[:10]

['3PHP',
 '1FJE',
 '1KKA',
 '1A9N',
 '4D5N',
 '1F85',
 '17RA',
 '2K66',
 '8KAI',
 '8T3E']

In [203]:
cell = data[data['ID'].str.startswith(target_id)]
cell = cell.drop('resid', axis=1)
cell = cell.drop('ID', axis=1)

In [204]:
cell.head()

Unnamed: 0,resname,x_1,y_1,z_1
130443,G,,,
130444,G,37.714001,-73.418999,73.100998
130445,U,37.092999,-78.536003,70.831001
130446,A,33.991001,-81.973999,69.350998
130447,G,28.669001,-83.289001,67.594002


In [205]:
def build_pdb(df):
    pdb_lines = []
    for i, row in df.iterrows():
        pdb_line = (
            f"HETATM{i+1:5d}  P   {row['resname']} A{i+1:4d}    "
            f"{row['x_1']:8.3f}{row['y_1']:8.3f}{row['z_1']:8.3f}  1.00  0.00           P"
        )
        pdb_lines.append(pdb_line)
    pdb_lines.append("END")
    return "\n".join(pdb_lines)

pdb_str = build_pdb(cell)

In [206]:
print(pdb_str)

HETATM130444  P   G A130444         nan     nan     nan  1.00  0.00           P
HETATM130445  P   G A130445      37.714 -73.419  73.101  1.00  0.00           P
HETATM130446  P   U A130446      37.093 -78.536  70.831  1.00  0.00           P
HETATM130447  P   A A130447      33.991 -81.974  69.351  1.00  0.00           P
HETATM130448  P   G A130448      28.669 -83.289  67.594  1.00  0.00           P
HETATM130449  P   C A130449      23.881 -81.578  64.783  1.00  0.00           P
HETATM130450  P   A A130450      20.068 -77.991  63.494  1.00  0.00           P
HETATM130451  P   A A130451      20.209 -73.713  60.278  1.00  0.00           P
HETATM130452  P   G A130452      20.674 -71.080  55.292  1.00  0.00           P
HETATM130453  P   U A130453      16.913 -68.545  49.959  1.00  0.00           P
HETATM130454  P   U A130454      14.748 -62.315  53.163  1.00  0.00           P
HETATM130455  P   A A130455      14.848 -57.697  54.614  1.00  0.00           P
HETATM130456  P   A A130456      16.091 

In [207]:
with tempfile.NamedTemporaryFile("w+", suffix=".pdb", delete=False) as tmp:
    tmp.write(pdb_str)
    tmp_path = tmp.name

In [217]:
view = nv.show_file(tmp_path)
view.clear_representations()
view.add_ball_and_stick()
view.center()
view

NGLWidget()

In [218]:
view.render_image()

Image(value=b'', width='99%')

In [215]:
# Load pdb from the protein data bank
view = nglview.show_pdbid(target_id)
view

NGLWidget()

In [216]:
view.render_image()

Image(value=b'', width='99%')