# Goals

I'd like to load in, process, and save the data so that I can just load it in for the figure plotting

# Imports

In [None]:
from pathlib import Path
import yaml
from tqdm import tqdm
import avoidome.uniprot as uniprot
from importlib import reload

In [None]:
data_dir = Path('../data')
fig_dir = Path('../figures')
uniprot_dir = data_dir / 'uniprot_downloads'
af_dir = data_dir / 'alphafold_downloads'
schema_cache = data_dir / 'schema_cache'

In [None]:
with open(data_dir / 'admet_names_curated.yml') as f:
    adme_names = yaml.safe_load(f)

# Fetch Uniprot Entries

In [None]:
exp_structure_dict = {}
failed = {}
success = {}
for protein in tqdm(adme_names):
    uniprot_id = protein['uniprot']
    ue = uniprot.UniprotEntry.from_uniprot_id(uniprot_id)
    try:
        success[uniprot_id] = ue
    except:
        failed[uniprot_id] = ue

## Save to local

In [None]:
for uniprot_id, ue in success.items():
    yaml.safe_dump(ue.dict(), open(f"{uniprot_dir / ue.data['id']}.yml", 'w'))

# Reload

In [None]:
entries = list(uniprot_dir.glob('*.yml'))
reload(uniprot)
uniprot_entries = [uniprot.UniprotEntry.from_dict(yaml.safe_load(open(file, 'r'))) for file in tqdm(entries)]

# Download AF Structures

In [None]:
# i've made this return a list but it's only ever going to be one
af_structures = [u.get_alphafold_structures()[0] for u in tqdm(uniprot_entries)]

In [None]:
af_structures[0]

In [None]:
from asapdiscovery.data.utils import download_file

In [None]:
for p in af_structures:
    file_path = af_dir / f"{p.components[0].name}.cif"
    if not file_path.exists():
        download_file(p.model_url, file_path)

# Get TargetStructureData from Uniprot Entries

In [None]:
import avoidome.target as target
reload(target)
tsds = [target.TargetStructureData.from_uniprot_entry(ue) for ue in tqdm(uniprot_entries)]

## Save to schema_cache

In [None]:
for tsd in tsds:
    yaml.safe_dump(tsd.dict(), open(f"{schema_cache / tsd.target_name}.yml", 'w'))