<a href="https://colab.research.google.com/github/UranMai/Bio/blob/master/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# FIND CROSS-LINKED ATOMS 
**Cross-linked atoms are atoms not connected in the primary structure**


*   https://www.wwpdb.org/documentation/file-format-content/format33/sect10.html
*   https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/framepdbintro.html
*   http://ofranservices.biu.ac.il/site/services/epitope/pdf/PDB-format.pdf
*   http://rasbt.github.io/biopandas/tutorials/Working_with_PDB_Structures_in_DataFrames/
*   https://academic.oup.com/bioinformatics/article/27/15/2163/404176



**HETATOM** - atoms in nonstandard residues. Nonstandard residues include inhibitors, cofactors, ions, and solvent. The only functional difference from **ATOM** records is that *HETATM* residues are by default not connected to other residues. Note that water residues should be in *HETATM* records.

In [23]:
%%bash
python drive/My\ Drive/Colab\ Notebooks/crossPDB2.py --pdb 5t1a.pdb

    element_symbol  atom_number atom_name  ... x_coord  y_coord  z_coord
2                C         1120        CB  ...  18.805   29.332  148.600
3                S         1121        SG  ...  17.945   30.434  147.510
4                C         1122        CD  ...  19.045   30.731  146.160
5                C         1123        CE  ...  20.078   31.810  146.405
6                O         1124       OZ1  ...  21.301   31.570  146.277
..             ...          ...       ...  ...     ...      ...      ...
127              O         3577         O  ...  10.355   22.239  154.323
128              O         3578         O  ...   0.508    4.148  161.185
129              O         3579         O  ...  15.034   14.598  215.822
130              O         3580         O  ...  17.079   12.508  215.054
131              O         3581         O  ...   1.182    4.687  163.526

[126 rows x 7 columns]
Done! 0.27 sec


In [25]:
!pip install biopandas
from biopandas.pdb import PandasPdb
pdb = PandasPdb()
pdb = pdb.read_pdb('5t1a.pdb')



In [4]:
pdb.df['ATOM'].head()

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,blank_3,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
0,ATOM,1,,N,,VAL,,A,37,,,-1.473,24.606,146.051,1.0,91.86,,,N,,629
1,ATOM,2,,CA,,VAL,,A,37,,,-1.496,24.944,147.482,1.0,88.55,,,C,,631
2,ATOM,3,,C,,VAL,,A,37,,,-0.941,26.368,147.707,1.0,83.45,,,C,,633
3,ATOM,4,,O,,VAL,,A,37,,,-1.452,27.09,148.545,1.0,85.03,,,O,,635
4,ATOM,5,,CB,,VAL,,A,37,,,-0.713,23.869,148.309,1.0,86.56,,,C,,637


In [24]:
pdb.df['HETATM'].head()

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,blank_3,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
0,HETATM,1118,,N,,YCM,,A,181,,,19.334,26.988,149.251,1.0,51.33,,,N,,2863
1,HETATM,1119,,CA,,YCM,,A,181,,,18.529,27.84,148.412,1.0,53.52,,,C,,2865
2,HETATM,1120,,CB,,YCM,,A,181,,,18.805,29.332,148.6,1.0,53.26,,,C,,2867
3,HETATM,1121,,SG,,YCM,,A,181,,,17.945,30.434,147.51,1.0,56.08,,,S,,2869
4,HETATM,1122,,CD,,YCM,,A,181,,,19.045,30.731,146.16,1.0,64.13,,,C,,2871


In [0]:
pdb.df['OTHERS'][pdb.df['OTHERS']['record_name'] == 'TER']

In [9]:
conect_df = pdb.df['OTHERS'][pdb.df['OTHERS']['record_name'] == 'CONECT']
conect_df.head()

Unnamed: 0,record_name,entry,line_idx
630,CONECT,591 1192,7668
631,CONECT,849 3564,7669
632,CONECT,1111 1118,7670
633,CONECT,1118 1111 1119,7671
634,CONECT,1119 1118 1120 1126,7672


In [10]:
# conect_df = pdb.df['OTHERS'][pdb.df['OTHERS']['record_name'] == 'CONECT']
# cons = conect_df['entry'].str.split()
# cons

630                      [591, 1192]
631                      [849, 3564]
632                     [1111, 1118]
633               [1118, 1111, 1119]
634         [1119, 1118, 1120, 1126]
                   ...              
748                     [3561, 3545]
749                     [3562, 3559]
750               [3563, 3557, 3558]
751    [3564, 849, 1561, 2808, 3571]
752                     [3571, 3564]
Name: entry, Length: 123, dtype: object

In [0]:
def atom_connections():
    """
    CONECT section in PDB - info about atom connections in "entry" column
    HETATM - atoms not connected to primary structure
    """

    conect_data = pdb.df['OTHERS'][pdb.df['OTHERS']['record_name'] == 'CONECT']
    connections =  conect_data['entry'].str.split()
    hetatms = pdb.df['HETATM']['atom_number'].to_list() #ids of all hetatoms

    atom_connect = list() #atoms that connects to other atoms in primary structure
    atom_noconnect = list() #actual hetatoms

    for atoms in connections:    
        if all(int(ids) in hetatms for ids in atoms):
            atom_noconnect.append(atoms)
        else:
            atom_connect.append(atoms)
    return (atom_connect, atom_noconnect)

def hetatoms_ids():
    """
    Find atoms ids in heratms that connect to atoms from primary structure
    """
    atom_connect, atom_nonconnect = atom_connections()
    hetatms = pdb.df['HETATM']['atom_number'].to_list()

    drop_atom_ids = list()
    nodrop_atom_ids = list()

    for atom in atom_connect:
        atom = list(map(int, atom))
        for idx in atom:
            if idx in hetatms:
                drop_atom_ids.append(idx)
            else:
                nodrop_atom_ids.append(idx)
    return list(set(drop_atom_ids))
    
def cross_linked_atoms():
    atom_ids = hetatoms_ids()
    hetatoms = pdb.df['HETATM']
    hetatoms = hetatoms[~hetatoms['atom_number'].isin(atom_ids)]
    columns = ['element_symbol','atom_number', 'atom_name', 'residue_name', 'x_coord', 'y_coord', 'z_coord']
    hetatoms = hetatoms[columns]
    return hetatoms

**Hetatoms ids that connect to other atoms from primary structure**


In [16]:
hetatoms_ids()

[1126, 1127, 3564, 3571, 1118, 1119]

**Output of cross-linked atoms**

In [21]:
cross_linked_atoms().head()

Unnamed: 0,element_symbol,atom_number,atom_name,residue_name,x_coord,y_coord,z_coord
2,C,1120,CB,YCM,18.805,29.332,148.6
3,S,1121,SG,YCM,17.945,30.434,147.51
4,C,1122,CD,YCM,19.045,30.731,146.16
5,C,1123,CE,YCM,20.078,31.81,146.405
6,O,1124,OZ1,YCM,21.301,31.57,146.277


# TEST

In [0]:
def find_conn_atoms_in_conect_table(len_value):
    """
    Find ids of hetatoms that connects to primary structure
    in CONECT section of PDB file  
    """    
    #CONECT table
    conect_data = pdb.df['OTHERS'][pdb.df['OTHERS']['record_name'] == 'CONECT']
    #Write the connections of atoms in CONECT table
    #"entry" column contains the atoms which connect to each other   
    connections =  conect_data['entry'].str.split()
    conn_atoms = list()
    for atoms in connections:
        if any(int(atom_id) < 3056 for atom_id in atoms):
            #print('connect')
            conn_atoms.append(atoms)
        else:
            continue
    return conn_atoms

def find_atom_idx(conn_atoms):
    atom_ids = list()
    for atom in conn_atoms:
       atom = list(map(int, atom))
       for idx in atom:
          if idx > 3056:
              atom_ids.append(idx)
    atom_ids = set(atom_ids)
    return atom_ids 

In [0]:
def find_conn_atoms_in_conect_table():
    """
    Find ids of hetatoms that connects to primary structure
    in CONECT section of PDB file 

    Return: list(hetatom ids)
    """
    #pdb.df['ATOM'].iloc[-1]['atom_number']
    len_atoms = len(pdb.df['ATOM'])
    #CONECT table
    conect_df = pdb.df['OTHERS'][pdb.df['OTHERS']['record_name'] == 'CONECT']
    #Write the connections of atoms in CONECT table
    #"entry" column contains the atoms which connect to each other   
    connections =  conect_df['entry'].str.split()
    conn_atoms = list()
    atom_ids = list()

    for atoms in connections:
        print(atoms)
        if any(int(atom_id) < len_atoms for atom_id in atoms):
            conn_atoms.append(atoms)
        else:
            continue
    
    for atom in conn_atoms:
      atom = list(map(int, atom))
      for idx in atom:
          if idx > len_atoms:
              atom_ids.append(idx)
    atom_ids = set(atom_ids)
    return list(atom_ids)
    

In [0]:
# atom_id = find_conn_atoms_in_conect_table()
def cross_link_atoms(atom_ids):
    hetatoms = pdb.df['HETATM']
    hetatoms = hetatoms[~hetatoms['atom_number'].isin(atom_ids)]
    
    columns = ['element_symbol', 'atom_name', 'residue_name', 'x_coord', 'y_coord', 'z_coord']
    hetatoms = hetatoms[columns]
    return hetatoms