In [None]:
import Get_Densities
import Get_GDT_TS
import sys
import os
import re
import numpy as np
import zipfile


In [None]:
def make_4Darray(filename):
    '''Extract density maps from dictionary to np.array'''
    #get the dictionary with atom densities - why not do an array immediately?
    density_dict=Get_Densities.main(filename)
    
    #Concatenate the 11 maps into one single array
    for key in sorted(density_dict): 
        #The first time, need to  create the new array
        if key==1: 
            density_array=np.expand_dims((density_dict[key]),axis=0)
        else:
            density_array=np.concatenate((density_array, np.expand_dims((density_dict[key]),axis=0)), axis=0)
            
    return density_array

In [None]:
def get_target(filename):
    f=open(filename)
    f.close()
    f_path=os.path.realpath(f.name)
    
    #Looking at the path to see which target that's beeing processed
    target_match=re.search('(T\d\d\d\d)',f_path)
    
    if target_match:
        target_name=target_match.group(1)
    
        return target_name 
    
    

In [None]:
def main():
    
    filenames=sys.argv[1:]
    #for every file create 11 densitymaps. 
    print ('Computing atom densities')
    counter=1
    no_passed=0
    all_arrays=[]
    all_scores=[]
    
    target_name=get_target(filenames[0])
    zip_name=target_name+'.npz'
    os.remove(zip_name)
    
    with zipfile.ZipFile(zip_name, mode='a', compression=zipfile.ZIP_DEFLATED) as zf:
        for filename in filenames: 
        
            if counter==1 or counter%5==0:
                print (counter,'of', len(filenames))
        
            #Try to create densitymaps and collect GDT score. If it doesn't work - 
            try: 
                #compute the 11 density maps
                #dens_array=make_4Darray(filename)
                dens_array=Get_Densities.main(filename)
                #create list of arrays: [all_maps_prot1, all_maps_prot2,...] 
                #all_arrays.append(dens_array)
            
                #find GDT_TS score - if it exists. Otherwise, returns 'No GDT_TS-score in TM file' 
                filename=filename+('.fixed.TM')
                GDT=Get_GDT_TS.main(filename)
            
                #If there is no GDT score in the TM file 
                if isinstance(GDT, str):
                    print (GDT)
                    print(filename)
                    no_passed+=1
                    counter+=1
                    continue
                tmpfilename='arr_{}.npy'.format(counter-1-no_passed)
                np.save(tmpfilename, dens_array)
                zf.write(tmpfilename)
                
                os.remove(tmpfilename)
                
                all_scores.append(GDT)
                counter+=1
        
            except Exception as e:
                print (e)
                print (filename)
                no_passed+=1
                counter+=1
                continue
    
    if counter!=2 and (counter+1)%5!=0:
        print (counter-1,'of', len(filenames))
    
    
    
    

    #generates zip_file with one array shape (x,11,120,120,120) where x is number of proteins used
    np.savez_compressed(target_name+'_scores', all_scores=all_scores)
    print (no_passed, 'files ignored')

        
main()