# Create HADDOCK 3 Experiments

In [2]:
import shutil, os, configparser
import pandas as pd

In [5]:
experiments = pd.read_excel('../../Experiments.xlsx', sheet_name='Experiments').head(2)
antibodies = pd.read_excel('../../Experiments.xlsx', sheet_name='Antibodies')
antigens = pd.read_excel('../../Experiments.xlsx', sheet_name='Antigens')

experiments.head()

Unnamed: 0,experiment_id,antibody_id,antigen_id,submitted,haddock_best_cluster,haddock_best_pdb_path,haddock_Nstruc,haddock_Evdw_plus_0.1Eelec,haddock_Evdw_plus_0.1Eelec_sd,haddock_Evdw,...,haddock_AIRviol,haddock_AIRviol_sd,haddock_dihedviol,haddock_dihedviol_sd,haddock_BSA,haddock_BSA_sd,haddock_score,haddock_score_sd,haddock_prodigy_deltaG_kcalpermol,haddock_prodigy_dissociation_constant_M
0,H5.3__EPI2800361,H5.3,EPI2800361,False,,,,,,,...,,,,,,,,,,
1,AVFluIgG01__EPI454493,AVFluIgG01,EPI454493,False,,,,,,,...,,,,,,,,,,


## AIR Restraints File Logic

In [6]:
## Define functions for creating ambiguous AIR files
def write_ambig_air_file(active1, passive1, active2, passive2, segid1='A', segid2='B', output_file="ambig.tbl"):
    with open(output_file, "w") as output_file:
        ## Convert residues to integers
        active1 = [int(x) for x in active1]
        passive1 = [int(x) for x in passive1]
        active2 = [int(x) for x in active2]
        passive2 = [int(x) for x in passive2]
        all1 = active1 + passive1
        all2 = active2 + passive2

        ## Write lines from the active1 list
        for resi1 in active1:
            output_file.write('assign (resi {:d} and segid {:s})'.format(resi1, segid1) + '\n')
            output_file.write('(\n')
            c = 0
            for resi2 in all2:
                output_file.write('       (resi {:d} and segid {:s})'.format(resi2, segid2) + '\n')
                c += 1
                if c != len(all2):
                    output_file.write('        or\n')
            output_file.write(') 2.0 2.0 0.0\n\n')

        ## Write lines from the active2 list
        for resi2 in active2:
            output_file.write('assign (resi {:d} and segid {:s})'.format(resi2, segid2) + '\n')
            output_file.write('(\n')
            c = 0
            for resi1 in all1:
                output_file.write('       (resi {:d} and segid {:s})'.format(resi1, segid1) + '\n')
                c += 1
                if c != len(all1):
                    output_file.write('        or\n')
            output_file.write(') 2.0 2.0 0.0\n\n')

    ## File will be closed automatically when exiting the 'with' block

## Make Config Files

In [7]:
def create_config(
      antibody_pdb = 'data/TEST_ANTIBODY.pdb',
      antigen_pdb = 'data/TEST_ANTIGEN.pdb',
      # reference_pdb = 'data/TEST_MATCHED.pdb',
      ambig_fname = "data/ambig.tbl",
      # unambig_fname = "data/unambig.tbl",
      output_file = 'config.cfg'
                  ):
    
    config = configparser.ConfigParser()

    ## Read the configuration file
    config.read('antibody_antigen_template_custom.cfg')

    ## Update the configuration
    config['main'] = {'run_dir': '"./output"',
                      'mode': '"local"',
                      'concat':  5,
                      'queue_limit': 100,
                      'molecules': [
                            antibody_pdb,
                            antigen_pdb
                            ]}
    
    config['rigidbody'] = {
       'tolerance': 5,
       'ambig_fname': f'"{ambig_fname}"'#,
      #  'unambig_fname': f'"{unambig_fname}"'
    }

    # config['caprieval_1'] = {'reference_fname': f'"{reference_pdb}"'}

    config['flexref'] = {
       'tolerance': 5,
       'ambig_fname': f'"{ambig_fname}"'#,
      #  'unambig_fname': f'"{unambig_fname}"'
    }

    # config['caprieval_3'] = {'reference_fname': f'"{reference_pdb}"'}

    config['emref'] = {
       'tolerance': 5,
       'ambig_fname': f'"{ambig_fname}"'#,
      #  'unambig_fname': f'"{unambig_fname}"'
    }
 
    ## Write the configuration to a file
    with open(output_file, 'w') as configfile:
        config.write(configfile)

    ## Replace specific lines in confi file (HACKY FIX)
    with open(output_file, 'r') as configfile:
      cfgdata = configfile.read()
    cfgdata = cfgdata.replace('[main]', '## Antibody-Antigen Docking with HADDOCK3')
                                                                                                         
    ## Write the file out again
    with open(output_file, 'w') as configfile:
      configfile.write(cfgdata)

## Loop Through Each Experiment and Make its Directory and Files

In [8]:
for index, experiment in experiments.iterrows():
    experiment_id = experiment['experiment_id']
    print(f"Preparing experiment: {experiment_id}")

    ## Find corresponding rows in the antibody and antigen dataframes
    antibody = antibodies[antibodies['antibody_id'] == experiment['antibody_id']].iloc[0]
    antigen = antigens[antigens['antigen_id'] == experiment['antigen_id']].iloc[0]

    ## Make experiment folders
    print(f"\tMaking experiment folders...")
    experiment_path = f"../../data/experiments/{experiment_id}/"
    os.makedirs(experiment_path, exist_ok=True)

    ## Copy antibody and antigen PDB files to experiment folder
    print(f"\tCopying antibody and antigen PDB files to experiment folder...")
    antibody_pdb_path = f"../../{antibody['antibody_pdb_path_renumbered']}"
    antibody_pdb_filename = os.path.basename(antibody_pdb_path)
    antigen_pdb_path = f"../../{antigen['antigen_pdb_path']}"
    antigen_pdb_filename = os.path.basename(antigen_pdb_path)

    shutil.copyfile(antibody_pdb_path, f"{experiment_path}{antibody_pdb_filename}")
    shutil.copyfile(antigen_pdb_path, f"{experiment_path}{antigen_pdb_filename}")

    ## Copy reference PDB file to experiment folder
    reference_pdb_filename = '5a3i_chainsACD.pdb'
    shutil.copyfile(reference_pdb_filename, f"{experiment_path}{reference_pdb_filename}")

    ## Generate AIR file for each experiment using active and passive residues
    active1 = antibody['antibody_active_residues'].replace('`','').split(',')
    passive1 = []
    active2 = antigen['antigen_active_residues'].replace('`','').split(',')
    passive2 = []
    candidate_path = f"../../data/experiments/{experiment_id}/"
    air_file_name = f"ambig.tbl"
    print(f"\tGenerating AIR file...")
    write_ambig_air_file(active1, passive1,
                         active2, passive2,
                         segid1='A', segid2='B',
                         output_file = f"{experiment_path}{air_file_name}")

    ## Genearate config file for each experiment
    print(f"\tGenerating config file...")
    create_config(
      antibody_pdb = antibody_pdb_filename,
      antigen_pdb = antigen_pdb_filename,
      reference_pdb = reference_pdb_filename,
      ambig_fname = air_file_name,
      # unambig_fname = "data/unambig_TEST.tbl",
      output_file = f"{experiment_path}config.cfg")
    
    print(f'\tDone preparing the experiment files for {experiment_id}!')

Preparing experiment: H5.3__EPI2800361
	Making experiment folders...
	Copying antibody and antigen PDB files to experiment folder...


FileNotFoundError: [Errno 2] No such file or directory: '../../nan'