# Generate AlphaFold3 Experiments

In [18]:
import pandas as pd
import json, os

In [22]:
antigens = pd.read_excel('../../Experiments.xlsx', sheet_name='Antigens')
antibodies = pd.read_excel('../../Experiments.xlsx', sheet_name='Antibodies')

In [23]:
antigens

Unnamed: 0,antigen_id,antigen_host_name,antigen_host_class,antigen_host_order,antigen_host_family,antigen_host_genus,antigen_host_species,antigen_collection_location,antigen_collection_location_continent,antigen_collection_location_country,...,antigen_pdb_type,antigen_active_residues,antigen_active_residue_selection,aa_4jug_pos_158,aa_4jug_pos_160,aa_4jug_pos_190,aa_4jug_pos_224,aa_4jug_pos_225,aa_4jug_pos_226,aa_4jug_pos_228
0,YP_308669.1,A/goose/Guangdong/1/1996(H5N1),,,,,,,Asia,China,...,,,,,,,,,,
1,WYN03019.1,A/Texas/37/2024(H5N1),,,,,,,North America,United States,...,,,,,,,,,,
2,AAT73273.1,A/Vietnam/1194/2004(H5N1),,,,,,,Asia,Vietnam,...,,,,,,,,,,


In [24]:
## Read in JSON template
with open('alphafold_input_template.json', 'r') as file:
    job_template = json.loads(file.read())

job_template

{'name': 'AF3',
 'sequences': [{'protein': {'id': ['H'], 'sequence': ''}},
  {'protein': {'id': ['L'], 'sequence': ''}},
  {'protein': {'id': ['A'], 'sequence': ''}}],
 'modelSeeds': [1337],
 'dialect': 'alphafold3',
 'version': 1}

In [26]:
## Loop through antigens and antibodies
for index_1, antigen in antigens.iterrows():
    for index_2, antibody in antibodies.iterrows():
        job = job_template.copy()
        job['name'] = f"{antigen['antigen_id']}__{antibody['antibody_id']}"

        job['sequences'] = [
            {'protein': {'id': ['H'], 'sequence': antibody['antibody_h_chain']}},
            {'protein': {'id': ['L'], 'sequence': antibody['antibody_l_chain']}},
            {'protein': {'id': ['A'], 'sequence': antigen['antigen_sequence']}}
        ]

        output_dir = f"../../data/experiments/alphafold3/{job['name']}"
        os.mkdir(output_dir)

        with open(f"../../data/experiments/alphafold3/{job['name']}/alphafold_input.json", 'w') as file:
            file.write(json.dumps(job, indent=4))
        print(f"Created job for {job['name']}")

Created job for YP_308669.1__FLD194
Created job for YP_308669.1__H5.3
Created job for YP_308669.1__65C6
Created job for YP_308669.1__100F4
Created job for YP_308669.1__3C11
Created job for YP_308669.1__AVFluIgG01
Created job for YP_308669.1__FLD21.140
Created job for YP_308669.1__H5M9
Created job for YP_308669.1__13D4
Created job for YP_308669.1__AVFluIgG03
Created job for YP_308669.1__12H5
Created job for WYN03019.1__FLD194
Created job for WYN03019.1__H5.3
Created job for WYN03019.1__65C6
Created job for WYN03019.1__100F4
Created job for WYN03019.1__3C11
Created job for WYN03019.1__AVFluIgG01
Created job for WYN03019.1__FLD21.140
Created job for WYN03019.1__H5M9
Created job for WYN03019.1__13D4
Created job for WYN03019.1__AVFluIgG03
Created job for WYN03019.1__12H5
Created job for AAT73273.1__FLD194
Created job for AAT73273.1__H5.3
Created job for AAT73273.1__65C6
Created job for AAT73273.1__100F4
Created job for AAT73273.1__3C11
Created job for AAT73273.1__AVFluIgG01
Created job for 

In [None]:
## Create CSV of job names
job_names = [f"{antigen['antigen_id']}__{antibody['antibody_id']}" for index_1, antigen in antigens.iterrows() for index_2, antibody in antibodies.iterrows()]

job_names_df = pd.DataFrame(job_names, columns=['experiment_id'])
job_names_df.to_csv('../02_experiment_submission/folding_jobs.csv', index=False)