# Process docking results
These results come from docking analysis, with Vina and Vinardo, using 166 ERK2 protein crystals and the 86 molecules selected by Fourches. 

In [2]:
import pandas as pd
import numpy as np
from glob import glob

In [8]:
DOCK_RESULTS_DIR = '../../ARCHIVOS/CRISTALES/LIGS_ERK2/FOURCHES_LIGS/3d_minimized/docking_results_CRYS_ENS/'

# List of results from Vina
vina_results_csv = glob(DOCK_RESULTS_DIR + 'VINA/*csv')
# sort by pdbid
vina_results_csv.sort(key = lambda x: x.split('/')[-1].split('_')[0])

# List of results from Vina
vinardo_results_csv = glob(DOCK_RESULTS_DIR + 'VINARDO/*csv')
# sort by pdbid
vinardo_results_csv.sort(key = lambda x: x.split('/')[-1].split('_')[0])

In [20]:
def get_docking_results(file_list):
    ''' Función para leer datos de Vinardo'''
    df_final = pd.DataFrame()
    for file in file_list:
        c = pd.read_csv(file)
        # Concat the results 
        # Set an index
        c.set_index("Ligando", inplace=True)
        # Sort the index by value
        i = c.index.to_series().str.rsplit('_').str[-1].astype(int).sort_values()
        # Reasing the index
        c = c.reindex(index= i.index)
        # Rename the column to match the pdbid name
        colname = c.columns[0].rsplit("_")[1]
        c.columns = [ colname ]
        # Add to the final dataframe
        df_final[ colname ] = c[ colname ]
    return df_final

### Create the dataframes and save the results

In [26]:
df_vina = get_docking_results(vina_results_csv)
df_vina.to_csv('./docking_results/vina_erk2_86mols_vs_166prots.csv')

In [25]:
df_vinardo = get_docking_results(vinardo_results_csv)
df_vinardo.to_csv('./docking_results/vinardo_erk2_86mols_vs_166prots.csv')

## Docking poses

In [48]:
MAIN_DIR = '../../ARCHIVOS/CRISTALES/LIGS_ERK2/FOURCHES_LIGS/3d_minimized/docking_results_CRYS_ENS/VINARDO_POSES'

#### Extract pdbqt results from *tar files

In [45]:
%%bash -s '$MAIN_DIR'
cd $1
mkdir PDBQTS
cd PDBQTS
for i in ../TARS/*tar
do
    x=${i##*/}
    pdb_id=${x%%_*}
    tar -xf $i --wildcards --no-anchored '*_dkVin.pdbqt'
done

Conve

In [113]:
# Create  the script to run chimera
dir_inputs = '.'

with open('chim_addh.py', 'w') as f:
    f.write(F'''
from chimera import runCommand as rc
from glob import glob

from os import chdir, listdir
chdir("{dir_inputs}") 

for m in glob('*.pdb'):
    rc('open ' + m)
    rc("addh")
    rc('write format mol2 0 ' + m[:-4] + '.mol2')
    rc('close all')    
    ''')

#### The following procedure ensures respect the position of polar Hydrogens and protonation states at pH 7

In [116]:
%%bash -s "$MAIN_DIR"
# copy the chimera file to convert from pdb to mol2
chimera='/home/joel/.local/UCSF-Chimera64-1.14rc/bin/chimera'
cp chim_addh.py $1
cd $1

mkdir SDF
# We need to get the sdf files of the docked pose
# To achive this first we need to convert from pdbqt to pdb, by removing all H's
# Then, we convert from pdb to sdf by adding H's at pH 7.0, following the same procedure used to create the original SDFs
for i in ./PDBQTS/*
do
    ligand=${i##*/}
    echo $i
    mkdir ./SDF/$ligand

    # move molecules to SDF dir
    cp $i/*pdbqt ./SDF/$ligand/
    cp chim_addh.py ./SDF/$ligand/
    
    cd ./SDF/$ligand/
    # convert to pdb (the first pose which in vina/vinardo is the best pose)
    obabel -ipdbqt *pdbqt -opdb -l 1 -m
    rm *.pdbqt
    
    # convert to sdf using chimera
    $chimera --nogui chim_addh.py
    wait
    
    # convert to sdf adding Hs at pH 7.0
    obabel -imol2 *mol2 -osdf  -m
    rm *.pdb 
    
    # Delete mol2 file
    rm *.mol2 
    rm chim_addh.py *pyc

    cd ../..
done

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

166 molecules converted
166 files output. The first is CSAR_erk2_1_1erk_dkVin.pdb
166 molecules converted
166 files output. The first is CSAR_erk2_1_1erk_dkVin.sdf
166 molecules converted
166 files output. The first is CSAR_erk2_10_1erk_dkVin.pdb
166 molecules converted
166 files output. The first is CSAR_erk2_10_1erk_dkVin.sdf
166 molecules converted
166 files output. The first is CSAR_erk2_11_1erk_dkVin.pdb
166 molecules converted
166 files output. The first is CSAR_erk2_11_1erk_dkVin.sdf
166 molecules converted
166 files output. The first is CSAR_erk2_12_1erk_dkVin.pdb
166 molecules converted
166 files output. The first is CSAR_erk2_12_1erk_dkVin.sdf
166 molecu