In [1]:
import sys
import glob

In [2]:
# Utils.py

PDB_TO_UNIPROT_TABLE_PATH = "/scratch/PI/rondror/akma327/DynamicNetworks/data/crystal-analysis/simulation-analysis/gpcrdb-freq-config/GPCR_PDB_List.txt"
GPCRDB_TABLE_PATH="/scratch/PI/rondror/akma327/DynamicNetworks/data/crystal-analysis/simulation-analysis/gpcrdb-freq-config/All_species_gpcrdb_numbers_strOnly.txt"
GPCRDB_RESIDUE_FREQ_TABLE="/scratch/PI/rondror/akma327/DynamicNetworks/data/crystal-analysis/simulation-analysis/gpcrdb-freq-config/gpcrdb_residue_conservation.txt"


# Rename amino acids to common name
def fixAminoAcidNames(key):
	key = key.replace("HSD", "HIS")
	key = key.replace("HSE", "HIS")
	key = key.replace("HSP", "HIS")
	key = key.replace("HIE", "HIS")
	key = key.replace("HIP", "HIS")
	key = key.replace("HID", "HIS")
	key = key.replace("GLH", "GLU")
	key = key.replace("ASH", "ASP")
	key = key.replace("CYP", "CYS")
	key = key.replace("CYX", "CYS")
	return key

# Retrive gpcrdb from dictionary for specified residue. Return "-" if not found
def getGPCRDB(res, GPCRDB_DICT):
	res = fixAminoAcidNames(res)
	if(res not in GPCRDB_DICT):
		print(res + " not found.")
		return "-"
	return GPCRDB_DICT[res]

# Create directory if not exist
def createDirectory(OUTPUT_FILE):
	directory = os.path.dirname(OUTPUT_FILE)
	if not os.path.exists(directory):
		os.makedirs(directory)

# Generate write file descriptor 
def genWriteDescriptor(OUTPUT_FILE):
	createDirectory(OUTPUT_FILE)
	return open(OUTPUT_FILE, 'w')


# Retrieve Uniprot Code for the PDB_CODE from pdb_to_uniprot_table_path
def getUniprotCode(PDB_CODE):
	f = open(PDB_TO_UNIPROT_TABLE_PATH, 'r')
	for line in f:
		if(line == "\n"): continue 
		l_info = line.split("\t")
		uniprot_code, pdb = l_info[0].strip(), l_info[2].strip()
		if(PDB_CODE.upper() == pdb.upper()): return uniprot_code.upper()
	print("PDB_CODE Not Found in PDB To Uniprot Table")
	exit(1)


# Given uniprot code reads through GPCRDB_TABLE_PATH to generate the amino acid
# to gpcrdb number table. 
# Output {"ASP112": "1x50", "ARG116":"2x45"}
def genGpcrdbDict(UNIPROT_CODE):
	GPCRDB_DICT = {}
	f = open(GPCRDB_TABLE_PATH, 'r')
	for line in f: 
		l_info = line.split("\t")
		uniprot, resnum, resname, gpcrdb = l_info[0].strip(), l_info[1].strip(), l_info[2].strip(), l_info[4].strip()
		if(uniprot.upper() == UNIPROT_CODE.upper()):
			key = resname.upper() + resnum 
			GPCRDB_DICT[key] = gpcrdb
	return GPCRDB_DICT


# Generates the residue to gpcrdb table for given pdb
def genResidueToGpcrdbTable(PDB_CODE):
	UNIPROT_CODE = getUniprotCode(PDB_CODE)
	GPCRDB_DICT = genGpcrdbDict(UNIPROT_CODE)
	return GPCRDB_DICT


In [13]:
def orderpair(atom1, atom2):
    if("LIG" in atom1): return (atom1, atom2)
    return (atom2, atom1)

def write_table(pdb_to_lw, OUTPUT_TABLE):
    f = open(OUTPUT_TABLE, 'w')
    pdbs = pdb_to_lw.keys()
    all_gpcrdb = set()
    for pdb in pdb_to_lw:
        all_gpcrdb |= set(pdb_to_lw[pdb])
    header = "GPCRDB\t" + "\t".join(pdbs)
    f.write(header + "\n")
    all_rows = []
    for gpcrdb in all_gpcrdb:
        row_info = ["LIGxLIG", gpcrdb[0], gpcrdb[1]]
        for pdb in pdbs:
            if(gpcrdb not in pdb_to_lw[pdb]):
                row_info.append("0")
            else:
                row_info.append("1")
        all_rows.append(row_info)
        
#         f.write("\t".join(row_info)+"\n")
    all_rows.sort(key=lambda x:x[2])
    print(all_rows)
    for row_info in all_rows:
        f.write(":".join(row_info[0:3]) + "\t" + "\t".join(row_info[3:]) + "\n")
#         f.write("\t".join(row_info)+"\n")
#         print(":".join(row_info[0:3]) + "\t" + row_info[3:]) 
#         f.write(":".join(row_info[0:3]) + "\t" + "\t".join(row_info[3:]) + "\n"))
#         f.write("\t".join(row_info) + "\n")
        
def extract_lw_info(INPUT_DIR):
    pdb_to_lw = {}
    pdb_files = glob.glob(INPUT_DIR + "/*txt")
    for index, pdb_file in enumerate(pdb_files):
        
        pdb = pdb_file.strip().split("/")[-1].strip(".txt")
        if(len(pdb) != 4): 
            continue
            
        ### Process each pdb file for its ligand residue interactions
        pdb_to_lw[pdb] = []
        GPCRDB_DICT = genResidueToGpcrdbTable(pdb)
        f = open(pdb_file, 'r')
        for line in f:
            linfo = line.strip().split("@")
            interaction_type = linfo[1].strip("-")
            atom1, atom2 = linfo[0].split(" -- ")
            ligand, residue_atom = orderpair(atom1, atom2)
            residue = residue_atom.split("-")[0]
            gpcrdb = getGPCRDB(residue, GPCRDB_DICT)
            if(gpcrdb != None and gpcrdb != "None"):
                pdb_to_lw[pdb].append((gpcrdb, interaction_type))
                
    return pdb_to_lw
        
def heatmap(INPUT_DIR, OUTPUT_TABLE):
    pdb_to_lw = extract_lw_info(INPUT_DIR)
    write_table(pdb_to_lw, OUTPUT_TABLE)


In [14]:
INPUT_DIR="/scratch/PI/rondror/akma327/DynamicNetworks/data/crystal-analysis/ligand-wetness/highres_conserved_ligand_interactions/021617"
OUTPUT_TABLE="/scratch/PI/rondror/akma327/DynamicNetworks/data/crystal-analysis/ligand-wetness/highres_conserved_ligand_interactions/021617/highres_ligand_conservation_heatmap.txt"

[]
