## Packages for the pipeline

In [9]:
import sys
sys.path.append('/Users/lbarlet/Desktop/omegaLoMo/lib/')
import networkx as nx
import numpy as np
import core as ca
import createTopo as cT

import matplotlib.pyplot as plt
import copy
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Creating Topology

>*database* is the path to your database (Mitab format) 

>*filterIds* correspond to a list of ids which manage to detect an homologue in your Query list



<div class="alert alert-warning">
This may take a while for big database
</div>

In [10]:
database = "/Volumes/arwen/home/lbarlet/WorkOnIntact/subset_intact.txt"
filterIds = "/Volumes/arwen/mobi/group/divisome/anaBlast/allInR6OnlyIn"

topo = cT.Topology(database, filterIds)
newDic = topo.filter_With(filterIds)

>*newDic* is the filtered and reduced topology of your database

This will be used later to construct the **OmegaMatrix**

In [11]:
print newDic

{'Q9UUL2': ['O14129', 'O14129', 'O14129', 'O14129'], 'Q9LJR3': ['Q9LJR3', 'Q94BM7', 'Q94BM7', 'Q9LJR3'], 'Q13418': ['Q9BWU0', 'Q9BWU0', 'Q9BWU0', 'Q9BWU0', 'Q9BWU0', 'Q9BWU0', 'Q9NVD7', 'P49023', 'Q9BWU0'], 'Q9A8N4': ['P0CAV8', 'P0CAV8', 'Q9A8N4', 'Q9A8N4', 'Q9A8N4', 'P0CAV8', 'P0CAV8', 'Q9A8N4', 'Q9A8N4'], 'Q9SYX2': ['Q9SYX2', 'Q9LJR3', 'Q94BM7', 'Q9LJR3', 'Q94BM7', 'Q9SYX2', 'Q9SYX2'], 'P55072': ['Q9UNN5', 'Q92575', 'Q9BZE9'], 'P36592': ['P36592'], 'P36601': ['P41410', 'P36601', 'P36601', 'Q9UUL2', 'Q9UUL2', 'P36592'], 'Q94BM7': ['Q94BM7', 'Q94BM7'], 'Q84TE6': ['Q84TE6'], 'Q7L0Q8': ['Q14289', 'Q14289', 'Q14289', 'Q05397', 'Q14289', 'Q13153']}


## Processing data

Here, you have to use the examples files provide in the git depository.

These files correspond to some subset and can be use on this notebook as a quick way to familiarise yourself with the pipeline

>*indexR6* is a list of UniprotId corresponding to the proteome of your organims

>*bean* is a serialized version of a small dataset, corresponding to a topology


In [13]:
indexR6 = '/Volumes/arwen/mobi/group/divisome/anaBlast/indexR6'
omegaSet = ca.HomegaSet(bean='/Volumes/arwen/mobi/group/divisome/exampleFiles/ser.json',
                        queryIdList=indexR6)

Organising the HomegaSet to an **OmegaMatrix** by deleting template wich doesn't bring back queries of interest. 

The **OmegaMatrix** will project from the template space to a matrix of queries to identify all interactions the queries.

In [None]:
omegaMatrix = ca.OmegaMatrix(topo = newDic, omegaSet = omegaSet)
omegaMatrix.reduceAndVectorInject()
queryTopo = omegaMatrix.project()

## Selection of proteins of interest  

>*divR6Known* is a downloaded list from uniprot with the "cell division" GO term selected

<div class="alert alert-info">
    You can check out the list <a href=http://www.uniprot.org/uniprot/?query=taxonomy:%22Streptococcus%20pneumoniae%20(strain%20ATCC%20BAA-255%20/%20R6)%20[171101]%22+go:51301> here
</div>


In [8]:
divR6Known = '/Volumes/arwen/mobi/group/divisome/divKnownR6.txt'
onlyId = []
with open (divR6Known, 'r') as file_div:
    for i in xrange(1):
        file_div.next()
    for line in file_div:
        sLine = line.split("\t")
        onlyId.append(sLine[0])

Here, you can check if the parsing works fine

The next cell is suppose to print your Id list with Uniprot identifier

In [7]:
print onlyId

['Q8CWP9', 'Q8DQM0', 'P64073', 'Q8DQE5', 'P64167', 'Q8DR70', 'Q8DR57', 'Q8DNE8', 'Q8DNI9', 'Q9EUQ7', 'Q8DNS0', 'Q8DR29', 'Q7ZAK7', 'Q8DR55', 'Q8DPV4', 'Q8DP40', 'Q8DR69', 'Q8CWQ5', 'Q8DQM2', 'Q8DQM1', 'P65467', 'P59676', 'Q8DPW6', 'Q8DQH3', 'Q8DNV6', 'Q8DQH4', 'Q8DNV8', 'Q8DQE8', 'Q8CZ65', 'Q8DPK2', 'Q8DNV9']


## Graphs Functions

In [None]:
def serializeGraph(graphEdge, path):
    
    jsonStruct = {"Query" : {}}
    print graphEdge
    for query, nodes in graphEdge.iteritems():
        jsonStruct["Query"][query.query] = {"nodesEdge" : {} }
        for node, param in nodes.iteritems():
            print param
            jsonStruct["Query"][query.query]["nodesEdge"][node.query] = {"lowQueryParam" : [low for low in param["lowQueryParam"]],
                                                                         "highQueryParam" : [high for high in param["highQueryParam"]]}
            
        
    json.dump(jsonStruct, file(path, 'w'))
    

In [None]:
def drawNeiTopo(neighbors_dict):
    print "Liste des 1ers voisine:\n"
    for node in neighbors_dict:
        print ', '.join([neighbor.query for neighbor in neighbors_dict[node]])

In [None]:
def filterGraph(graph, limiteEv = 0):
    
    G = copy.deepcopy(graph)
    
    for edge in G.edge:
        for node in G[edge].keys():
            for i, lowQueryEval in enumerate(G[edge][node]['lowQueryParam']):
                
                if float(limiteEv) < float(lowQueryEval[4]):
                    G.adj[edge][node]['highQueryParam'].pop(i)
                    G.adj[edge][node]['lowQueryParam'].pop(i)

                    break
            
            if node in G[edge] and len(G[edge][node]['highQueryParam']) > 0:
                for i, highQueryEval in enumerate(G[edge][node]['highQueryParam']):
                    if float(limiteEv) < float(highQueryEval[4]):
                        G.adj[edge][node]['highQueryParam'].pop(i)
                        G.adj[edge][node]['lowQueryParam'].pop(i)
            
                        break
            
            if len(G.adj[edge][node]['highQueryParam']) <= 0 and len(G.adj[edge][node]['lowQueryParam']) <= 0:
                del G.adj[edge][node]
                
        # Remove Node with no interactions
    
    for node in G.node.keys():
        if not G.neighbors(node):
            G.remove_node(node)
    
    
    nx.draw_networkx(G, node_color = color_map, with_labels = True)
    plt.show()

In [None]:
#Get the coordinates from the miniMatrix of homologPair interations
#blacklist = '/Users/mbachir/Desktop/omegaLoMo/R6R6InIntact'

def drawGraph(queryTopo):

    G=nx.Graph()
    for interaction in queryTopo.getEdges(blacklist=None):
        G.add_edge(interaction['lowQuery'], interaction['highQuery'], 
                   lowQueryParam = [lowQueryEval for lowQueryEval in interaction['loQueryEval']] ,
                   highQueryParam = [highQueryEval for highQueryEval in interaction['hiQueryEval']])
    
    # Remove Node with no interactions
    
    for node in G.node.keys():
        if not G.neighbors(node):
            G.remove_node(node)

    
    #for ID in onlyId:
        #print ID + "\n ---- \n"
    if onlyId[1] in queryTopo.dictQuery:
        #drawNeiTopo({ID : G.neighbors(queryTopo.dictQuery[ID][0])})
        drawNeiGraph(queryTopo.dictQuery, 
                     {queryTopo.dictQuery['Q8DR55'][0] : G.neighbors(queryTopo.dictQuery['Q8DR55'][0])})
        
    else:
        print 'Key not present'
    
    #pos = nx.spring_layout(G,scale=9)
    #nx.draw_networkx(G, pos, node_color = color_map, with_labels = True)
    #plt.show()
    return G

In [None]:
def drawNeiGraph(dictQuery, edges_dict):
    G=nx.Graph()
    
    # Déclaration des variables
    neighborParam = []
    queryParam = []
    for query in edges_dict:
        
        for qt in dictQuery[query.query]:
            if not qt.param in queryParam:
                queryParam.append(qt.param)
        
        for neighbor in edges_dict[query]:
            #del neighborParam[:]
            
            for qt in dictQuery[neighbor.query]:
                if not qt.param in neighborParam:
                    neighborParam.append(qt.param)
            
            # Création de edges
        G.add_edge(query, neighbor, 
                lowQueryParam = queryParam,
                highQueryParam = neighborParam)
    
    #serializeGraph(G.edge, "/Users/mbachir/Desktop/omegaLoMo/testGraph/test.json")
    plt.show()

In [None]:
#range_list = np.linspace(float(2.51425e-20), float(2.51425e-02))
#print range_list

G = drawGraph(queryTopo)

#dic = {'A': {'B': [{'loQueryParam': [u'1', u'538', u'157', u'69', u'2.2272e-56'], 'hiQueryParam': [u'1', u'318', u'109', u'62', u'4.29381e-29']}], 'C': [{'loQueryParam': [u'1', u'538', u'157', u'69', u'2.2272e-56'], 'hiQueryParam': [u'94', u'244', u'43', u'23', u'3.07111e-05']}]}}
#print list(dic['A']['B'])
#pos = nx.spring_layout(G,k=0.15,iterations=20)

#for limite in range_list:
    #filterGraph(G, limite)