# Generate AlphaFold3 Experiments

In [4]:
import pandas as pd
import json, os

In [5]:
proteins = pd.read_excel('../../Experiments.xlsx', sheet_name='Viral Proteins')
glycans = pd.read_excel('../../Experiments.xlsx', sheet_name='Glycans')

In [6]:
proteins

Unnamed: 0,accession,location,year,aa_sequence,source_note
0,EPI3171488,Texas,2024,MENIVLLLAIVSLVKSDQICIGYHANNSTEQVDTIMEKNVTVTHAQ...,Human in Bovine Clade
1,EPI1846961,Astrakhan,2020,MENIVLLLAIVSLVKSDQICIGYHANNSTEQVDTIMEKNVTVTHAQ...,Candidate Vaccine Virus
2,PQ809550,Louisiana,2024,MENIVLLLAIISLVKSDQICIGYHANNSTEQVDTIMEKNVTVTHAQ...,Human in Avian Clade
3,PQ591824,California,2024,MENIVLLLAIVSLVKSDQICIGYHANNSTEQVDTIMEKNVTVTHAQ...,Human in Bovine Clade


In [9]:
glycans

Unnamed: 0,glycan_id,pdb_id,chain,molecule_name,smiles,source
0,4KDO_I,4KDO,I,N-acetyl-alpha-neuraminic acid-(2-6)-beta-D-ga...,CC(=O)NC1C(O)OC(CO)C(OC2OC(COC3(CC(O)C(NC(C)=O...,Human
1,4K63_I,4K63,I,N-acetyl-alpha-neuraminic acid-(2-3)-beta-D-ga...,CC(=O)NC1C(O)OC(CO)C(OC2OC(CO)C(O)C(OC3(CC(O)C...,Avian


In [7]:
## Read in JSON template
with open('alphafold_input_template.json', 'r') as file:
    job_template = json.loads(file.read())

job_template

{'name': 'jobname',
 'modelSeeds': [1337],
 'sequences': [{'protein': {'id': 'A', 'sequence': ''}},
  {'ligand': {'id': 'B', 'smiles': ['']}}],
 'dialect': 'alphafold3',
 'version': 1}

In [13]:
glycans['glycan_id'][0]

'4KDO_I'

In [14]:
## Loop through proteins and molecules
for index_1, protein in proteins.iterrows():
    for index_2, glycan in glycans.iterrows():
        job = job_template.copy()
        job['name'] = f"{protein['accession']}__{glycan['glycan_id']}"

        job['sequences'] = [
            {'protein': {'id': 'A', 'sequence': protein['aa_sequence']}},
            {'ligand':  {'id': 'B', 'smiles': glycan['smiles']}}
        ]

        output_dir = f"../../data/experiments/af3/{job['name']}"
        os.mkdir(output_dir)

        with open(f"../../data/experiments/af3/{job['name']}/alphafold_input.json", 'w') as file:
            file.write(json.dumps(job, indent=4))
        print(f"Created job for {job['name']}")

Created job for EPI3171488__4KDO_I
Created job for EPI3171488__4K63_I
Created job for EPI1846961__4KDO_I
Created job for EPI1846961__4K63_I
Created job for PQ809550__4KDO_I
Created job for PQ809550__4K63_I
Created job for PQ591824__4KDO_I
Created job for PQ591824__4K63_I


In [15]:
## Create CSV of job names
job_names = [f"{protein['accession']}__{glycan['glycan_id']}" for index_1, protein in proteins.iterrows() for index_2, glycan in glycans.iterrows()]

job_names_df = pd.DataFrame(job_names, columns=['experiment_id'])
job_names_df.to_csv('folding_jobs.csv', index=False)