# ABOUT
This file calculates fractional and absolute counts of surface-accessible residues

In [1]:
import numpy as np
import os
import sys

from tqdm import tqdm


from finches.utils import folded_domain_utils

# lets you import from the code directory here
local_code_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'code'))

if local_code_path not in sys.path:
    sys.path.append(local_code_path)


from domain_def import Domain


In [2]:
mode_name='af2'

# define where the alphafold proteome used for input is
# the rootdir must be a directory where the ONLY files in that directory are PDB files from AlphaFold2. Importantly the filename structure
# should be `AF-<UniProtID>-F1-model_v4.pdb` because this then gets parsed by the code below to map unioprot ID to filename
rootdir = '../data/UP000002311_559292_YEAST_v4'

# build mapping of uniprot IDs to filenames
uid2fn = {}
for entry in os.listdir(rootdir):        
    uid = entry.split('-')[1]
    uid2fn[uid] = entry


In [4]:
# the code here cycles through each PDB file in the proteome and decomposes it into 
uid2folded_domain = {}
for idx, uid in tqdm(enumerate(list(uid2fn.keys()))):

    
    infile = f'{rootdir}/{uid2fn[uid]}'

    # build a finches folded domain
    x = folded_domain_utils.FoldedDomain(infile, SASA_ONLY=True)          

    # amino acid sequence of PDB file
    seq = x.sequence

    # per-residue SASA value
    sasa = x.sasa 
    
    # assign each domain to each UID
    
    uid2folded_domain[uid] = [seq,sasa]



6039it [18:04,  5.57it/s]


In [26]:
fh = open(f'../data/shprd_files/shprd_sites_{mode_name}_per_res_SASA.tsv','w')

for uid in uid2folded_domain:

    entry = uid2folded_domain[uid]

    x = [str(int(round(i))) for i in entry[1]]    
    seq = entry[0]

    outstring=''
    for i,sasa in enumerate(x):
        fh.write(f"{uid}\t{i+1}\tsasa_site\t{seq[i]}\t{sasa}\n")
            
fh.close()


In [27]:
fh = open(f'../data/shprd_files/shprd_track_{mode_name}_per_res_SASA.tsv','w')

for uid in uid2folded_domain:

    entry = uid2folded_domain[uid]

    x = [str(int(round(i))) for i in entry[1]]    
    data_string = ''
    for i in x:
        data_string = data_string + i + '\t'
    

    
    
    fh.write(f"{uid}\tsasa_site\t{data_string}\n")
            
fh.close()
