Input: 
       - NoisePCSF-080116_union.sif - The union file from running Noise sweeps with PCSF.
       - ChasmanNetwork-UndirEdges.txt - The undirected connections in the interactome.
Output: 
    - input-Network_080116.tsv - Input network for TPS
    - partialDirectedModel.sif - partial directed model for TPS

Creates a list of all of the undirected edges in the union of PCSF noise sweeps. Then, these undirected edges are deleted in order to create a partial directed model. All the edges in the partial directed model must be in the interactome.

# Import Libraries

In [1]:
import os.path
import matplotlib.pyplot as plt
import pandas as pd

# Import files

In [2]:
USERPATH = r'/home/dylan/Documents/HDD/Wisconsin/'
NOTEBOOK = r'osmotic-stress/Notebooks/'
FILEPATH = './'
UndirectedPath = '../ChasmanNetwork-DirUndir/ChasmanNetwork-UndirEdges.txt'
#FILEPATH = r'/home/dylan/Documents/HDD/Wisconsin/osmotic-stress/Notebooks/Forest-TPS/'
#UndirectedPath = r'/home/dylan/Documents/HDD/Wisconsin/osmotic-stress/Notebooks/ChasmanNetwork-DirUndir/ChasmanNetwork-UndirEdges.txt'
#obtain file from Noise sweep with PCSF
networkPath = FILEPATH + '_union.sif'
dfNetwork = pd.read_csv(networkPath, sep = ' ', header = None)
dfNetwork.drop(1,1,inplace=True)

#obtain undirected interacome
#UndirectedPath = FILEPATH + 'ChasmanNetwork-UndirEdges.txt'
dfUndirected = pd.read_csv(UndirectedPath, sep = '\t', header = None)

# Print dataframes to show changes in column headings

In [3]:
dfNetwork.columns = ['GeneA', 'GeneB']
dfUndirected.columns = ['GeneA', 'GeneB']

print dfUndirected
print dfNetwork

      GeneA    GeneB
0   YDR420W  YER118C
1   YDR420W  YOR153W
2   YDR420W  YHR154W
3   YGL209W  YGR014W
4   YGL035C  YGR014W
5   YGR014W  YLR229C
6   YER118C  YGR014W
7   YER118C  YJL128C
8   YER118C  YNL152W
9   YDL117W  YER118C
10  YER118C  YOR208W
11  YCL027W  YER118C
12  YBR023C  YER118C
13  YER118C  YER118C
14  YER118C  YLR452C
15  YER118C  YPR032W
16  YER118C  YLR353W
17  YER118C  YMR032W
18  YER118C  YOR188W
19  YCL032W  YER118C
20  YAL041W  YER118C
21  YER118C  YOR181W
22  YER118C  YLR362W
23  YDL235C  YIL147C
24  YDR388W  YIL147C
25  YIL147C  YJR074W
26  YHR206W  YIL147C
27  YIL147C  YIR004W
28  YJL123C  YPR075C
29  YCL032W  YPR075C
30  YGL209W  YPR075C
31  YGL035C  YPR075C
        GeneA    GeneB
0     YJR059W  YPR074C
1     YBR160W  YDR239C
2     YJR059W  YPR091C
3     YNL166C  YPL031C
4     YJR059W  YKL064W
5     YBR160W  YJL076W
6     YLR096W  YNL054W
7     YDR054C  YHR205W
8     YDR477W  YPR115W
9     YBR160W  YNL233W
10    YHR183W  YPL031C
11    YAR002W  YBR160W
12    YD

## Output to file input network for TPS

In [4]:
path = FILEPATH + 'input-Network_080116.tsv'
dfNetwork.to_csv(path,  index = False, header = False, sep = '\t')

# Find all undirected edges in the network, and remove them

In [5]:
lstIndex = []
count = 0
for index1,row1 in dfUndirected.iterrows(): #iterate through undirected edges
    for index2,row2 in dfNetwork.iterrows(): #iterate through interactome
        if ((row1['GeneA'] == row2['GeneA']) and (row1['GeneB'] == row2['GeneB'])) or ((row1['GeneA'] == row2['GeneB']) and (row1['GeneB'] == row2['GeneA'])):
            print row1
            print row2
            lstIndex.append(index2) #make a list of undirected edges
            count = count + 1

dfNetwork = dfNetwork.drop(dfNetwork.index[lstIndex]) #remove undirected edges

print "\n\n"

print "Number of edges removed: " + str(count)
print "Index of edges removed from PCSF file: \n" + str(lstIndex)

GeneA    YER118C
GeneB    YJL128C
Name: 7, dtype: object
GeneA    YER118C
GeneB    YJL128C
Name: 176, dtype: object
GeneA    YDL117W
GeneB    YER118C
Name: 9, dtype: object
GeneA    YDL117W
GeneB    YER118C
Name: 69, dtype: object
GeneA    YER118C
GeneB    YOR208W
Name: 10, dtype: object
GeneA    YER118C
GeneB    YOR208W
Name: 296, dtype: object
GeneA    YBR023C
GeneB    YER118C
Name: 12, dtype: object
GeneA    YBR023C
GeneB    YER118C
Name: 110, dtype: object
GeneA    YER118C
GeneB    YLR452C
Name: 14, dtype: object
GeneA    YER118C
GeneB    YLR452C
Name: 245, dtype: object
GeneA    YER118C
GeneB    YOR188W
Name: 18, dtype: object
GeneA    YER118C
GeneB    YOR188W
Name: 200, dtype: object
GeneA    YCL032W
GeneB    YER118C
Name: 19, dtype: object
GeneA    YCL032W
GeneB    YER118C
Name: 91, dtype: object
GeneA    YAL041W
GeneB    YER118C
Name: 20, dtype: object
GeneA    YAL041W
GeneB    YER118C
Name: 263, dtype: object
GeneA    YER118C
GeneB    YOR181W
Name: 21, dtype: object
GeneA    Y

# Run Code a Second time to make sure that all undirected edges were removed

In [6]:
count = 0
for index1,row1 in dfUndirected.iterrows():
    for index2,row2 in dfNetwork.iterrows():
        if ((row1['GeneA'] == row2['GeneA']) and (row1['GeneB'] == row2['GeneB'])) or ((row1['GeneA'] == row2['GeneB']) and (row1['GeneB'] == row2['GeneA'])):
            print row1
            print row2
            lstIndex.append(index2)
            count = count + 1

print "Number of edges found to be undirected: " + str(count)

Number of edges found to be undirected: 0


## Create partial Directed Model file

In [7]:
class Edge:
    def __init__(self):
        self.GeneA = ""
        self.GeneB = ""

    def EdgeExists(self, Network):
        for edge in Network:
            if edge.GeneA == self.GeneA and edge.GeneB == self.GeneB:
                return True
        return False

    def DirectedEdgeExists(self, DirectedNetwork):
        for edge in DirectedNetwork:
            if edge.GeneA == self.GeneA and edge.GeneB == self.GeneB:
                print "Found in Directed Network"
                print "FoundA: " + self.GeneA + ": " + edge.GeneA
                print "FoundB: " + self.GeneB + ": " + edge.GeneB
                return True    
            elif edge.GeneB == self.GeneA and edge.GeneA == self.GeneB:
                print "Found in Directed Network Flipped"
                temp = self.GeneA
                self.GeneA = self.GeneB
                self.GeneB = temp
                print "FoundA: " + self.GeneA + ": " + edge.GeneA
                print "FoundB: " + self.GeneB + ": " + edge.GeneB
                return True
        return False

    def outputEdge(self, f):
        f.write(self.GeneA + "\t")
        f.write("N" + "\t")
        f.write(self.GeneB + "\n")

In [8]:
DirectedPath = '../ChasmanNetwork-Dir/ChasmanNetwork-Dir.txt'

# used to check whether any edges appear A->B and B->A in the directed network
sortedEdgeSet = set()

print('Removing duplicate directed edges:')
with open(DirectedPath, "r") as f:
    DirectedNetwork = []
    for line in f:
        newEdge = Edge()
        elements = line.strip("\n").split("\t")
        newEdge.GeneA = elements[0]
        newEdge.GeneB = elements[1]
        if(newEdge.EdgeExists(DirectedNetwork)):
            print newEdge.GeneA + ": " + newEdge.GeneB
        else:
            DirectedNetwork.append(newEdge)
        # sort A and B to ignore the direction, then add A:B to the set
        sortedEdge = sorted(elements[0:2])
        sortedEdgeSet.add(':'.join(sortedEdge))

print('{} unique directed edges'.format(len(DirectedNetwork)))
assert len(DirectedNetwork) == len(sortedEdgeSet)

Removing duplicate directed edges:
YHR205W: DOT6TOD6
7583 unique directed edges


In [9]:
duplicates = 0
with open(networkPath, "r") as f:
    partialDirectedNetwork = []
    # iterate over the Steiner forest output and flip edges as needed to match the
    # direction of the original directed network
    for line in f:
        newEdge = Edge()
        elements = line.strip("\n").split()
        newEdge.GeneA = elements[0]
        newEdge.GeneB = elements[2]
        if newEdge.DirectedEdgeExists(DirectedNetwork): #check if undirected edge in directed model list
            if newEdge.EdgeExists(partialDirectedNetwork): #check if directed edge is already in the partialDirectedNetwork
                print edge.GeneA + ": " + newEdge.GeneA
                duplicates += 1
            else: #if in directed list and not in partial model then place into partial model
                partialDirectedNetwork.append(newEdge)
        else:
            print "NOT"

print('Found {} duplicates'.format(duplicates))

Found in Directed Network
FoundA: YJR059W: YJR059W
FoundB: YPR074C: YPR074C
Found in Directed Network
FoundA: YBR160W: YBR160W
FoundB: YDR239C: YDR239C
Found in Directed Network
FoundA: YJR059W: YJR059W
FoundB: YPR091C: YPR091C
Found in Directed Network Flipped
FoundA: YPL031C: YPL031C
FoundB: YNL166C: YNL166C
Found in Directed Network
FoundA: YJR059W: YJR059W
FoundB: YKL064W: YKL064W
Found in Directed Network
FoundA: YBR160W: YBR160W
FoundB: YJL076W: YJL076W
Found in Directed Network
FoundA: YLR096W: YLR096W
FoundB: YNL054W: YNL054W
Found in Directed Network Flipped
FoundA: YHR205W: YHR205W
FoundB: YDR054C: YDR054C
Found in Directed Network
FoundA: YDR477W: YDR477W
FoundB: YPR115W: YPR115W
Found in Directed Network
FoundA: YBR160W: YBR160W
FoundB: YNL233W: YNL233W
Found in Directed Network Flipped
FoundA: YPL031C: YPL031C
FoundB: YHR183W: YHR183W
Found in Directed Network Flipped
FoundA: YBR160W: YBR160W
FoundB: YAR002W: YAR002W
Found in Directed Network Flipped
FoundA: YPL031C: YPL03

In [10]:
with open(FILEPATH + "partialDirectedModel.sif", "w") as f:
    for edge in partialDirectedNetwork:
        edge.outputEdge(f)