# Virtual screening for high affinity guests for synthetic supramolecular receptors

## Replication of our analysis in the manuscript using python

In [1]:
import pandas as pd
import numpy as np
from simons_data_science_library import db_utils as rdc
from simons_data_science_library import dat_tran as dtran

### We load and clean data from the supporting information

#### Molecular descriptors

In [2]:
tab_gold_wt = rdc.RDSDatabaseConnector.csv_load('gold_weightings.txt', ' ')

our df
   Guest  Ligand_clash  Ligand_torsion  Part_buried  Non-polar  \
0      1           0.0             0.0      -2.1090   -43.3137   
1      2           0.0             0.0      -3.0465   -27.4388   
2      3           0.0             0.0      -2.4099   -41.2634   
3      4           0.0             0.0      -2.2600   -32.0514   
4      5           0.0             0.0      -2.7351   -40.6438   

   Ligand_flexibility  
0                   0  
1                   0  
2                   0  
3                   0  
4                   0  


#### ChemPLP scoring function

In [3]:
chemplp_score = rdc.RDSDatabaseConnector.csv_load('chemplp_score.txt', ' ')

our df
   Guest  logKexpt  ChemPLP_Score  Guest.1  logKexpt.1  ChemPLP_Score.1  \
0      1      3.49          49.81       19        4.28            45.54   
1      2      1.15          39.49       20        3.94            50.66   
2      3      3.83          48.47       21        1.15            47.52   
3      4      1.87          38.88       22        1.73            38.51   
4      5      3.96          47.67       23        2.11            39.26   

   Guest.2  logKexpt.2  ChemPLP_Score.2  
0       37        4.90            53.86  
1       38        4.30            57.79  
2       39        4.00            49.12  
3       40        1.95            45.94  
4       41        2.08            53.28  


In [4]:
chemplp_score = dtran.DataTransform.df_reshape_one(chemplp_score,3)
chemplp_score.tail()

3


Unnamed: 0,Guest,logKexpt,ChemPLP_Score
49,50,2.3,44.66
50,51,0.48,40.7
51,52,0.7,42.4
52,53,0.9,39.14
53,54,0.7,46.49


#### LogK calculated using equation 2

In [5]:
eq2_train_logk = rdc.RDSDatabaseConnector.csv_load('eq2_train_lit_logk.txt', ' ')

our df
   Guest  logKexpt  logKcalc  Guest.1  logKexpt.1  logKcalc.1  Guest.2  \
0      1      3.49      3.28       19        4.28        2.92       37   
1      2      1.15      1.87       20        3.94        2.43       38   
2      3      3.83      3.08       21        1.15        1.99       39   
3      4      1.87      2.34       22        1.73        2.35       40   
4      5      3.96      2.99       23        2.11        2.37       41   

   logKexpt.2  logKcalc.2  
0        4.90        3.27  
1        4.30        3.83  
2        4.00        3.23  
3        1.95        3.65  
4        2.08        0.38  


In [6]:
eq2_train_logk = dtran.DataTransform.df_reshape_one(eq2_train_logk,3)
eq2_train_logk.tail()

3


Unnamed: 0,Guest,logKexpt,logKcalc
49,50,2.3,2.98
50,51,0.48,1.96
51,52,0.7,2.56
52,53,0.9,2.12
53,54,0.7,2.67


In [7]:
eq3_train_logk= rdc.RDSDatabaseConnector.csv_load('eq3_train_lit_logk.txt', ' ')

our df
   Guest  logKexpt  logKcalc  Guest.1  logKexpt.1  logKcalc.1  Guest.2  \
0      1      3.49      4.02       19        4.28        3.58       37   
1      2      1.15      2.20       20        3.94        3.52       38   
2      3      3.83      3.75       21        1.15        2.40       39   
3      4      1.87      2.83       22        1.73        2.85       40   
4      5      3.96      3.62       23        2.11        2.87       41   

   logKexpt.2  logKcalc.2  
0        4.90        3.02  
1        4.30        3.82  
2        4.00        3.95  
3        1.95        3.58  
4        2.08        1.75  


In [8]:
eq3_train_logk = dtran.DataTransform.df_reshape_one(eq3_train_logk,3)
eq3_train_logk.tail()

3


Unnamed: 0,Guest,logKexpt,logKcalc
49,50,2.3,3.66
50,51,0.48,2.29
51,52,0.7,2.19
52,53,0.9,1.58
53,54,0.7,1.41


In [9]:
eq3_test_logk= rdc.RDSDatabaseConnector.csv_load('eq3_test_lit_logk.txt', ' ')

our df
   Guest  logKexpt  logKcalc  Guest.1  logKexpt.1  logKcalc.1
0     55      6.80      7.12     63.0        4.45        4.10
1     56      8.00      6.35     64.0        4.18        4.06
2     57      7.26      6.29     65.0        4.20        4.02
3     58      6.06      6.00     66.0        4.11        4.08
4     59      6.09      5.74     67.0        3.60        3.24


In [10]:
eq3_test_logk = dtran.DataTransform.df_reshape_one(eq3_test_logk,3)

eq3_test_logk.tail()

2


Unnamed: 0,Guest,logKexpt,logKcalc
10,65.0,4.2,4.02
11,66.0,4.11,4.08
12,67.0,3.6,3.24
13,68.0,1.11,0.88
14,69.0,3.4,2.51


In [11]:
print(eq3_test_logk)

    Guest  logKexpt  logKcalc
0    55.0      6.80      7.12
1    56.0      8.00      6.35
2    57.0      7.26      6.29
3    58.0      6.06      6.00
4    59.0      6.09      5.74
5    60.0      5.73      5.68
6    61.0      5.50      6.73
7    62.0      2.88      3.27
8    63.0      4.45      4.10
9    64.0      4.18      4.06
10   65.0      4.20      4.02
11   66.0      4.11      4.08
12   67.0      3.60      3.24
13   68.0      1.11      0.88
14   69.0      3.40      2.51
