In [None]:
##Take data of bioactive compounds and ask for what they interact with in homo sapiens
#Retrieve data via CTD's batch Querry Tool, send an HTTP GET request to http://ctdbase.org/tools/batchQuery.go
import requests, sys, json, time, math, os
import pandas as pd
#Input and Output File
infile = 'bioactive.tsv'
outfile1 = 'interactionsCTD.tsv'
outjson = 'faceted_intact_results'
outfile3 = 'faceted_inact_node_network.tsv'
organism=9606
test = True
debug = False

#Define the Program to easily request data from chems and other types of data
def cgixns(infile, outfile1, inputType='chem', actionTypes='ANY', debug=False):
    with open(infile, 'r') as lines:
        inTerms = lines.read()
    #CTD URL Batch Querry with input
    url = 'http://ctdbase.org/tools/batchQuery.go?report=cgixns&format=tsv&inputTerms='
    get = requests.get(url+inTerms+'&'+'inputType='+inputType+'&'+'actionTypes='+actionTypes)
    #Save interaction data in outfile1
    with open(outfile1, 'wb') as b:
        b.write(get.content)
    #Set debug=True if making/editing code
    if debug == True:
        print(inTerms)
        print(type(get))
        print(f"{get.status_code}: {get.reason}")
        with open(infile, 'rb') as lines:
            print(lines.read())
    return print("Done! Have a great rest of your research! :)")

#Run
cgixns(infile, outfile1 ,actionTypes='binding')

In [None]:
#Define program to grab all interaction data from IntAct on all genes in CTD Data from cgixns as omniscience
def omniscience(outfile1, outjson, jsonSize=10_000, organism=9606, test=False, debug=False):
    if organism != 9606:
        print('Sorry, currently only humans supported! Come back soon.')
        return
    ###Define function to take outfile1 dataframe and get all interactions between genes
    #Don't need to make them connect yet with chemicals.
    
    ##Turn outfile1 into a dataframe with pandas
    of1df = pd.read_table(outfile1) #outfile1 dataframe code
    #Select for only human data (assuming human); haa stands for "I'm only Human, After All" (its a meme)
    haa = of1df[of1df["OrganismID"] == 9606]
    #debug
    if debug == True:
        print(of1df.head(3))
        print('\n\nNext Table\n\n')
        print(haa.head(3))
    
    ##select for 5th column values, the genesymbols, and save as a list & string
    genesymbols = haa["GeneSymbol"].drop_duplicates(keep='first')  
    gsl = genesymbols.to_list() #genesymbols list variable = gsl
    gss = ''
    for name in gsl:
        if gss == '':
            gss += name
        else:
                gss += ' '+name
    #debug
    if debug == True:
        print(gss[0:20])
        print(gsl[0:5])

    ##Script to querry for all gene products interactions with the above bioactive compounds
    #Search for interactions with findInteractionWithFacet on IntAct Advanced Search with gss
    url_facet = 'https://www.ebi.ac.uk/intact/ws/interaction/findInteractionWithFacet?'
    #The Parameters
    pm = {"advancedSearch" : True, "intraSpeciesFilter":True, "page": 1, "pageSize": 1, "query":"taxidA:9606 taxidB:9606" + gss}
    post = requests.post(url_facet,params=pm)
    i = 1
    totalele= post.json()['data']['totalElements']
    filenum = math.ceil(totalele / jsonSize)
    #Omniscience feedback#
    print('The number of elements in total:',totalele)
    del totalele
    if test == False:
        print("The number of files shall be:",filenum)
        print('Omniscience prepped. Beginning to write file: \n',i,"of",filenum)
    else:
        print("Since this is a test, there will only be 1 file; normally, the number of files would be:", filenum)
        print('Omniscience prepped. Begining to write the file.')

    pm['pageSize'] = jsonSize
    #Save interactions data json in folder as outfile2
    
    outfile2 = outjson + str(i) + '.json'
    print('Saving file 1...')
    with open(outfile2, 'wb') as f:
        for chunk in requests.post(url_facet,params=pm).iter_content(chunk_size=4096):
            f.write(chunk)
    print('File',i,'done. The server took',post.elapsed,'to process.\n')
    
    #Option to only make 1 file to save time
    if test == False:
        x =  post.json()['data']['totalPages']
        pageNum = int(x)
        del post
        del x
        while i< pageNum:
            i += 1
            pm['page'] = i
            #Save interactions data json in folder as outfile2
            outfile2 = outjson + str(i) + '.json'
            print('Assigning...')
            with open(outfile2, 'wb') as f:
                for chunk in requests.post(url_facet,params=pm).iter_content(chunk_size=4096):
                    f.write(chunk)
            print('Assigned.')
            print('File',i,'done.\n')
            time.sleep(1)        
#     #debug
#     if debug == True:
#         #Make a script to grab all elements (maybe after calling once and grabing elements #)
#         numelepage = post.json()['data']['numberOfElements']
#         print('The number of elements on page:', numelepage)
#         pageNum =  post.json()['data']['totalPages']
#         print('The number of pages:', pageNum)
#         del numelepage
#         del pageNum
    
    #Option to only make 1 file to save time
#     if test == False:
#         x =  post.json()['data']['totalPages']
#         pageNum = int(x)
#         del post
#         del x
#         while i< pageNum:
#             i += 1
#             pm['page'] = i
#             #Save interactions data json in folder as outfile2
#             outfile2 = outjson + str(i) + '.json'
#             with open(outfile2, 'wb') as b:
#                 print('Assigning...')
#                 b.write(requests.post(url_facet,params=pm).content)
#             print('Assigned.')
#             print('File',i,'done.\n')
#             time.sleep(1)        
    #Exiting Messages
    print('Omniscience complete. \n',i,'file(s) have been blessed upon you.')
    print('Please consider using reductionism (the program) on your data so it is inteligible.')
    print('The sciences shall voyage far from our island of ignorance into the midst of black seas of infinity.')
omniscience(outfile1, outjson, test=False)

In [None]:
####Make an edge network of source nodes and target nodes (whether chemical or gene)
def reductionism(outfile1, outjson, outfile3, outputHeader=True, organism=9606):
    nodesSource = []
    nodesTarget = []
    print('Oh yeah, its all about to be SourceNode or TargetNode up in here... \n Please wait...')
    ###Pull out from content moleculeA & moleculeB
    def nodepull(b,y=[],z=[]):
        for x in json.load(b)['data']['content']:
            y.append(x['moleculeA'])
            z.append(x['moleculeB'])
    
    #Pull up every outjson file and use for edge table
    with os.scandir() as directory:
        for item in directory:
            if item.name.startswith(outjson) and item.name.endswith('.json') and item.is_file():
                with open(item,'rb') as b:
                    nodepull(b,nodesSource,nodesTarget) #NOTE, should replace later because opening each json is bad
                    print(item.name, 'is done being reduced!')
    
    ##Put edge list for input chemicals in
    of1df = pd.read_table(outfile1) #outfile1 dataframe code
    #Select for only human data(assuming human); haa stands for "I'm only Human, After All" (its a meme)
    haa = of1df[of1df["OrganismID"] == organism]
    ##select for 4th column values, the CASRN, and save as a list & string
    CasRN = haa[["CasRN","GeneSymbol"]].drop_duplicates(keep='first')
    for x in CasRN.CasRN.to_list():
        nodesSource.append(x)
    for x in CasRN.GeneSymbol.to_list():
        nodesTarget.append(x)
    
    #Put Node list into node library
    nodes = {'SourceNode':nodesSource,
             'TargetNode':nodesTarget}
    nodedf = pd.DataFrame(nodes)
    ##Sort by row then by column alphanumerically to remove duplicate edges (a-b == b-a)
    nodesort = nodedf.values
    nodesort.sort(axis=1)
    nodedf = pd.DataFrame(nodesort, nodedf.index, nodedf.columns)
    nodedf = nodedf.sort_values(by='SourceNode')
    nodedf = nodedf.drop_duplicates(keep='first')
    #debug
    if debug == True:
        print(*nodes, sep='\t')
        print(nodedf)
    
    #Comes down to a 2 column dataframe in outfile3, input for outputHeader
    nodedf.to_csv(outfile3, index=False, sep='\t', header= outputHeader)
    print('Reduced to atoms... or at least: \n',os.stat(outfile3).st_size/1000, 'kB')
reductionism(outfile1,outjson,outfile3)