# Copy simulation results into the analysis repository
This notebook copies data from the simulation directory into the repository. If you have not run the simulations yourself you can skip this notebook and use the data in the repository instead. 

In [1]:
import os
import shutil

from PLBenchmarks import targets, ligands, edges
from tqdm.notebook import tqdm

import benchmarkpl
path = benchmarkpl.__path__[0]



_ColormakerRegistry()

# Set path of simulation directory
Customize here if you have a specific simulation path

In [2]:
targets.setDataDir('../../../02_benchmark_calculations/')

# Number of targets, ligands and edges in the data set

In [3]:
nligs, nedgs = 0, 0
print(f'{"Target":10s} {"Num Ligs":>10s} {"Num Edges":>10s}')
print(33 * '-')
for target in tqdm(targets.target_list):
    target = target["name"]
    print(f'{target:10s} {len(ligands.ligandSet(target)):10d} {len(edges.edgeSet(target)):10d}')
    nligs += len(ligands.ligandSet(target))
    nedgs += len(edges.edgeSet(target))
print(33 * '-')
print(f'{"total":10s} {nligs:10d} {nedgs:10d}')

Target       Num Ligs  Num Edges
---------------------------------


HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))

jnk1               21         31
pde2               21         34
thrombin           11         16
p38                34         56
ptp1b              23         49
galectin            8          7
cdk2               16         25
cmet               18         35
mcl1               42         71
bace               36         58
bace_hunt          32         60
bace_p2            12         26
tyk2               16         24
ros1               28         27
eg5                28         63
cdk8               33         54
hif2a              42         92
pfkfb3             40         66
pde10              35         36
shp2               26         56
syk                44         99
tnks2              27         60

---------------------------------
total             593       1045


# Copy data about targets, ligands, edges to repository
### Attention: Data is deleted before copying a new version. Only execute the cell if you know what you are doing.

In [14]:
target_yaml =  os.path.join(path, 'targets.yml')
if os.path.exists(target_yaml):
    dest_path = os.path.join(path, targets.getTargetDir(target))
    shutil.copy(target_yaml,
                dest_path
               )
else:
    print(f'File {target_yaml} does not exist.')
for target in tqdm(targets.target_list):
    target = target["name"]
    os.makedirs(os.path.join(path, targets.getTargetDir(target)), exist_ok=True)
    data_path = os.path.join(targets.dataDir, 
                             targets.getTargetDir(target), 
                             '00_data'
                            )
    if os.path.exists(data_path):
        dest_path = os.path.join(path, targets.getTargetDir(target), '00_data')
        if os.path.exists(dest_path):
            shutil.rmtree(dest_path)
        shutil.copytree(data_path,
                        dest_path
                       )
    else:
        print(f'Data path does not exist for target {target}')

HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))




# pmx calculations with openFF parameters (Hahn et al.)
Copies raw result files from calculations into repository path

In [6]:
# function to retrieve data from PLBenchmarks calculations
def copyRawResults(target, forcefield='openff-1.0.0.offxml'):
    # copy raw dat file
    file_path = os.path.join(targets.dataDir, 
                             targets.getTargetDir(target), 
                             '06_pmx',
                             f'{forcefield}',
                             'results', 
                             f'{target}_{forcefield}.dat'
                            )
    if not os.path.exists(file_path):
        print(f'File {file_path} does not exist.')
    else:
        # copy file to backup directory
        shutil.copy(file_path, 
                    os.path.join(path, '..', '00_data', 'input')
                   )
    # copy csv file with more detailed information
    file_path = os.path.join(targets.dataDir, 
                             targets.getTargetDir(target), 
                             '06_pmx',
                             f'{forcefield}',
                             'results', 
                             f'{target}_{forcefield}.csv'
                            )
    if not os.path.exists(file_path):
        print(f'File {file_path} does not exist.')
    else:
        # copy file to backup directory
        shutil.copy(file_path, 
                    os.path.join(path, '..', '00_data', 'input')
                   )

In [11]:
# choose forcefields here
forcefields = ['openff-1.0.0.offxml']

In [12]:
for target in tqdm(targets.target_list):
    target = target["name"]
    for forcefield in forcefields:
        copyRawResults(target, forcefield)

HBox(children=(FloatProgress(value=0.0, max=22.0), HTML(value='')))


