In [1]:
import ray
import ray.local_scheduler as local_scheduler

# store access to nodes by their global coordinate
@ray.remote
class MasterStore:

    def __init__(self):
        self.referenceStore = {"dna":{}, "rna":{}, "individuals":{}}
        self.adjacency_list = {"dna": Adjacency_List(), "rna": Adjacency_List(), "individuals": Adjacency_List()}

    def addGraph(self, graph_id):
        self.referenceStore[graph_id] = {}
        self.adjacency_list[graph_id] = Adjacency_List()
        
    def add_node_to_graph(self, graph_id, key, node, adjacency_list = []):
        oid = node
        
        if not isinstance(oid, local_scheduler.ObjectID):
            oid = ray.put(node)
            
        self.add_adjacency_information(graph_id, key, oid, adjacency_list)
        
    def add_adjacency_information(self, graph_id, key, node_oid, adjacency_list):
        self.adjacency_list[graph_id].add_adjacency_information(key, node_oid, adjacency_list)

    def append_to_adjacency_list(self, graph_id, key, new_adjacent_node_key):
        self.adjacency_list[graph_id].add_new_adjacent_node(key, new_adjacent_node_key)
        
    def add_inter_graph_connection(self, graph_id, key, other_graph_id, other_graph_key):
        self.adjacency_list[graph_id].add_inter_graph_connection(key, other_graph_id, other_graph_key)
        self.adjacency_list[graph_id].add_inter_graph_connection(key, other_graph_id, other_graph_key)
        
    def node_exists(self, graph_id, key):
        return graph_id in self.adjacency_list and key in self.adjacency_list[graph_id].adjacency_list
    
    def get_node(self, graph_id, key):
        return ray.get(self.adjacency_list[graph_id].oid_dictionary[key])
    
    def get_inter_graph_connections(self, graph_id, key, other_graph_id = ""):
        if other_graph_id == "":
            return self.adjacency_list[graph_id].inter_graph_connections[key]
        else:
            return self.adjacency_list[graph_id].inter_graph_connections[key][other_graph_id]
        
    def get_adjacency_list(self, graph_id, key):
        return self.adjacency_list[graph_id].adjacency_list[key]
        
#     def updateRef(self, datatype, key, value):
#         oid = value

#         if not isinstance(oid, local_scheduler.ObjectID):
#             oid = ray.put(value)        

#         self.referenceStore[datatype][key] = oid

#     def getRef(self, datatype, key):
#         ref = self.referenceStore[datatype][key]
#         # check if this is a ray object id or a node
#         if isinstance(ref, local_scheduler.ObjectID):
#             return ray.get(ref)
#         elif isinstance(ref, Node):
#             return ref
#         else:
#             raise ValueError("The graph does not contain references or nodes at key: \'" + str(key) + "\'")

#     def refExists(self, datatype, key):
#         return datatype in self.referenceStore and key in self.referenceStore[datatype]
    
class Adjacency_List:
    
    def __init__(self):
        self.oid_dictionary = {}
        self.adjacency_list = {}
        self.inter_graph_connections = {}
        
    def add_adjacency_information(self, key, oid, adjacency_list):
        self.oid_dictionary[key] = oid
        self.adjacency_list[key] = set(adjacency_list)
        self.create_inter_graph_connection(key)
    
    def add_new_adjacent_node(self, key, adjacent_node_key):
        self.adjacency_list[key].add(adjacent_node_key)
        
    def create_inter_graph_connection(self, key):
        self.inter_graph_connections[key] = {}
        
    def add_inter_graph_connection(self, key, other_graph_id, adjacency_list):
        if not self.inter_graph_connections[key]:
            self.create_inter_graph_connection(key)
        self.inter_graph_connections[key][other_graph_id] = set([adjacency_list])

In [2]:
class Neighbor:

    @staticmethod
    def opposite():
        return Neighbor

    def __init__(self):
        self.destination_coordinates = set()

    def add(self, coordinate):
        self.destination_coordinates.add(coordinate)

class Previous(Neighbor):

    @staticmethod
    def opposite():
        return Next

    def __init__(self):
        self.destination_coordinates = set()

class Next(Neighbor):

    @staticmethod
    def opposite():
        return Previous

    def __init__(self):
        self.destination_coordinates = set()

In [3]:
# a generic node.
class Node:

    def __init__(self, key, data, datatype):
        self.key = key
        self.data = data
        self.datatype = datatype
#         self.neighbors = { Neighbor.__name__: Neighbor(), Previous.__name__: Previous(), Next.__name__: Next() }
#         for neighbor_type in new_neighbors:
#             for neighbor in new_neighbors[neighbor_type]:
#                 self.addNeighbor(neighbor, neighbor_type)
#         self.interGraphLinks = {}

#     def addNeighbor(self, newNeighbor, type_of_neighbor = Neighbor):
#         assert(type(type_of_neighbor).__name__ == 'type')
#         type_of_neighbor = type_of_neighbor.__name__
#         if type_of_neighbor == Neighbor.__name__ or type_of_neighbor == Previous.__name__ or type_of_neighbor == Next.__name__:
#             self.neighbors[type_of_neighbor].add(newNeighbor)
#         else:
#             raise ValueError("The type of neighbor must be Neighbor, Previous, or Next. Type was " + type_of_neighbor.__name__)

#     def dropNeighbor(self, oldNeighbor):
#         self.neighbors.remove(oldNeighbor)

#     def addInterGraphLink(self, datatype, key):
#         if datatype in self.interGraphLinks:
#             if key not in self.interGraphLinks[datatype]:
#                 self.interGraphLinks[datatype].append(key)
#         else:
#             self.interGraphLinks[datatype] = [key]

#     def merge(self, otherNode):
#         for neighbor_type in otherNode.neighbors:
#             for neighbor in otherNode.neighbors[neighbor_type].destination_coordinates:
#                 self.addNeighbor(neighbor, eval(neighbor_type))

#         for key, value in otherNode.interGraphLinks.items():
#             for link in value:
#                 self.addInterGraphLink(key, link)
                
class Edge:
    
    def __init__(self, destination, weight = 0, orientation = "none"):
        self.destination = destination
        self.weight = weight
        self.orientation = orientation
        
    def update_weight(self, new_weight):
        self.weight = new_weight
        
    def add_to_weight(self, weight_to_add):
        self.weight += weight_to_add
        
    def update_orientation(self, new_orientation):
        self.orientation = new_orientation

In [4]:
def build_individuals_graph(individuals, master_store):
    graph_id = "individuals"
    for indiv_id, data in individuals.items():
        node = Node(indiv_id, data, graph_id)
#        masterStore.updateRef.remote("individuals", indivID, node)
        master_store.add_node_to_graph.remote(graph_id, indiv_id, node)

In [5]:
def build_dna_graph(reference_genome, dna_test_data, master_store):
    graph_id = "dna"
    # start building the graph
    for i in range(len(reference_genome)):
        coordinate = float(i)
        # store the coordinates of neighboring nodes
        neighbors = []

        if i != 0:
            neighbors.append(Edge(float(i - 1), 0, "left"))
        if i != len(reference_genome) - 1:
            neighbors.append(Edge(float(i + 1), 0, "right"))
        
        # create a new node
        node = Node(coordinate, reference_genome[i], graph_id)

        # store a link to the object in the masterStore
#        masterStore.updateRef.remote("dna", float(i), node)
        master_store.add_node_to_graph.remote(graph_id, coordinate, node, neighbors)
    
    for indiv in dna_test_data:
        for variant in indiv["dnaData"]:
            
            coordinate = variant["coordinateStart"]
            # store the coordinates of neighboring nodes
            neighbors = []
            neighbors.append(Edge(float(int(coordinate) - 1), 0, "left"))
            neighbors.append(Edge(float(int(variant["coordinateStop"])), 0, "right"))

            # create a new node for the individual data
            node = Node(coordinate, variant["variantAllele"], graph_id)
            
            master_store.add_node_to_graph.remote(graph_id, coordinate, node, neighbors)
            #node.addInterGraphLink("individuals", indiv["individualID"])
            master_store.add_inter_graph_connection.remote(graph_id, coordinate, "individuals", indiv["individualID"])
            
#             if ray.get(masterStore.refExists.remote("dna", variant["coordinateStart"])):
#                 node.merge(ray.get(masterStore.getRef.remote("dna", variant["coordinateStart"])))
            edge_to_this_node = Edge(coordinate, 0, "none")
            for neighbor in neighbors:
                master_store.append_to_adjacency_list.remote(graph_id, neighbor.destination, edge_to_this_node)
#                 if ray.get(masterStore.refExists.remote("dna", neighbor)):
                    
#                     tempNode = ray.get(masterStore.getRef.remote("dna", neighbor))
#                     tempNode.addNeighbor(variant["coordinateStart"], neighbor_type.opposite())
#                     masterStore.updateRef.remote("dna", neighbor, tempNode)
#             indivNode = ray.get(masterStore.getRef.remote("individuals", indiv["individualID"]))
#             indivNode.addInterGraphLink("dna", variant["coordinateStart"])
#             masterStore.updateRef.remote("individuals", indiv["individualID"], indivNode)

In [6]:
def bfs(graph_id, start_node_id, master_store):
    q = [start_node_id]
    visited = []

    while(q and ray.get(master_store.node_exists.remote(graph_id, q[0]))):
        current_key = q[0]
        q.remove(current_key)
        visited.append(current_key)

        node = ray.get(master_store.get_node.remote(graph_id, current_key))
        print(str(current_key) + "\t" + str(node.data) + "\t" + str(ray.get(master_store.get_inter_graph_connections.remote(graph_id, current_key))))

        for neighbor in ray.get(master_store.get_adjacency_list.remote(graph_id, current_key)):
            if neighbor.destination not in visited and neighbor.destination not in q:
                q.append(neighbor.destination)

In [7]:
ray.init()

Waiting for redis server at 127.0.0.1:41769 to respond...
Waiting for redis server at 127.0.0.1:56708 to respond...
Starting local scheduler with 8 CPUs, 0 GPUs

View the web UI at http://localhost:8894/notebooks/ray_ui4463.ipynb?token=15a268d3cb453542bb1268ff1149179c2402d53a04269566



{'local_scheduler_socket_names': ['/tmp/scheduler68609629'],
 'node_ip_address': '127.0.0.1',
 'object_store_addresses': [ObjectStoreAddress(name='/tmp/plasma_store80254776', manager_name='/tmp/plasma_manager61141718', manager_port=47510)],
 'redis_address': '127.0.0.1:41769',
 'webui_url': 'http://localhost:8894/notebooks/ray_ui4463.ipynb?token=15a268d3cb453542bb1268ff1149179c2402d53a04269566'}

In [9]:
# all communication to adjacent nodes goes through the master store
master_store = MasterStore.remote()

# sample reference genome
reference_genome = "CAGTCCTAGCTACGCTCTATCCTCTCAGAGGACCGATCGATATACGCGTGAAACTAGTGCACTAGACTCGAACTGA"

# sample test data for DNA operations
dna_test_data = [{"individualID":0, "dnaData":
                [{"coordinateStart":7.1, "coordinateStop":8.0, "variantAllele": "C"},
                 {"coordinateStart":12.2, "coordinateStop":13.0, "variantAllele": "T"},
                 {"coordinateStart":26.2222, "coordinateStop":27.0, "variantAllele": "TTTT"}]},
               {"individualID":1, "dnaData":
                [{"coordinateStart":7.2, "coordinateStop":8.0, "variantAllele": "G"},
                 {"coordinateStart":12.2, "coordinateStop":13.0, "variantAllele": "T"}]}]

# individual IDs
individuals = {0: {"Name":"John Doe", "Gender":"M"}, 1: {"Name":"Jane Doe", "Gender":"M"}}

build_individuals_graph(individuals, master_store)
print(ray.get(master_store.get_node.remote("individuals", 0)).data)
print(ray.get(master_store.get_node.remote("individuals", 1)).data)

# build the graph
build_dna_graph(reference_genome, dna_test_data, master_store)

# traverse our new graph to look at
bfs("dna", 0.0, master_store)

print(ray.get(master_store.get_inter_graph_connections.remote("individuals", 0)))
print(ray.get(master_store.get_inter_graph_connections.remote("individuals", 1)))

print(ray.get(master_store.get_node.remote("individuals", 0)))
print(ray.get(master_store.get_node.remote("individuals", 1)))

{'Name': 'John Doe', 'Gender': 'M'}
{'Name': 'Jane Doe', 'Gender': 'M'}
0.0	C	{}
1.0	A	{}
2.0	G	{}
3.0	T	{}
4.0	C	{}
5.0	C	{}
6.0	T	{}
7.2	G	{'individuals': {1}}
7.1	C	{'individuals': {0}}
7.0	A	{}
8.0	G	{}
9.0	C	{}
10.0	T	{}
11.0	A	{}
12.0	C	{}
12.2	T	{'individuals': {1}}
13.0	G	{}
14.0	C	{}
15.0	T	{}
16.0	C	{}
17.0	T	{}
18.0	A	{}
19.0	T	{}
20.0	C	{}
21.0	C	{}
22.0	T	{}
23.0	C	{}
24.0	T	{}
25.0	C	{}
26.0	A	{}
26.2222	TTTT	{'individuals': {0}}
27.0	G	{}
28.0	A	{}
29.0	G	{}
30.0	G	{}
31.0	A	{}
32.0	C	{}
33.0	C	{}
34.0	G	{}
35.0	A	{}
36.0	T	{}
37.0	C	{}
38.0	G	{}
39.0	A	{}
40.0	T	{}
41.0	A	{}
42.0	T	{}
43.0	A	{}
44.0	C	{}
45.0	G	{}
46.0	C	{}
47.0	G	{}
48.0	T	{}
49.0	G	{}
50.0	A	{}
51.0	A	{}
52.0	A	{}
53.0	C	{}
54.0	T	{}
55.0	A	{}
56.0	G	{}
57.0	T	{}
58.0	G	{}
59.0	C	{}
60.0	A	{}
61.0	C	{}
62.0	T	{}
63.0	A	{}
64.0	G	{}
65.0	A	{}
66.0	C	{}
67.0	T	{}
68.0	C	{}
69.0	G	{}
70.0	A	{}
71.0	A	{}
72.0	C	{}
73.0	T	{}
74.0	G	{}
75.0	A	{}
{}
{}
<__main__.Node object at 0x1121f26d8>
<__main__.Node obj

In [None]:
## this will store all reads in their original form
masterStore.addGraph.remote("reads")
# this will store the genome graph for all reads
masterStore.addGraph.remote("readsGenomeGraph")

#sample reads
sampleReadData = [{"contigName": "chr1", "start": 268051, "end": 268101, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2307:5603:121126", "sequence": "GGAGTGGGGGCAGCTACGTCCTCTCTTGAGCTACAGCAGATTCACTCNCT", "qual": "BCCFDDFFHHHHHJJJIJJJJJJIIIJIGJJJJJJJJJIIJJJJIJJ###", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": False, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "47T0G1", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:2\tNM:i:2\tXG:i:0\tXA:Z:chr16,-90215399,50M,2;chr6,-170736451,50M,2;chr8,+71177,50M,3;chr1,+586206,50M,3;chr1,+357434,50M,3;chr5,-181462910,50M,3;chr17,-83229095,50M,3;\tX1:i:5\tX0:i:3", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 1424219, "end": 1424269, "mapq": 37, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2107:15569:102571", "sequence": "AGCGCTGTAGGGACACTGCAGGGAGGCCTCTGCTGCCCTGCTAGATGTCA", "qual": "CCCFFFFFHHHHHJJJJJJJJJJIJJJJJJJJJJJJJJJIJJIJHIIGHI", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": False, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "50", "origQual": None, "attributes": "XT:A:U\tXO:i:0\tXM:i:0\tNM:i:0\tXG:i:0\tX1:i:0\tX0:i:1", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 1443674, "end": 1443724, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712", "sequence": "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "qual": "############################BBBCDEEA<?:FDCADDD?;=+", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": True, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "50", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:0\tNM:i:0\tXG:i:0\tX0:i:1406", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 1443676, "end": 1443726, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413", "sequence": "TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "qual": "###########################B?;;AFHFIGDDHDDDDDBD@?=", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": True, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "50", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:0\tNM:i:0\tXG:i:0\tX0:i:1406", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 2653642, "end": 2653692, "mapq": 25, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2306:20003:84408", "sequence": "ANNACACCCCCAGGCGAGCATCTGACAGCCTGGAACAGCACCCACACCCC", "qual": "######JJJJJJJIJIJJIHGGGIIJJJJJJJJJJJJHHFHHDDDBFC@@", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": True, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "0T0C0C47", "origQual": None, "attributes": "XT:A:U\tXO:i:0\tXM:i:3\tNM:i:3\tXG:i:0\tX1:i:0\tX0:i:1", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 2664732, "end": 2664782, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2106:12935:169714", "sequence": "GAGCATGTGACAGCCTAGGTCGGCACCCACACCCCCAGGTGAGCATCTGA", "qual": "FDBDCHFFEHDCCAFHIHA6EGB?8GGFF?8IEHEB@FHDHGEDDBD@@@", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": True, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "6C9G33", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:2\tNM:i:2\tXG:i:0\tX1:i:13\tX0:i:5", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 2683541, "end": 2683591, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2107:5053:12847", "sequence": "AGCACCCACAACCACAGGTGAGCATCCGACAGCCTGGAACAGCACCCACA", "qual": "CCCFFFFFHGHHHJIJJJHGGIIJJJJJIJGIIJJIJJIJJJJIJIIJJJ", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": False, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "50", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:0\tNM:i:0\tXG:i:0\tXA:Z:chr1,+2687435,50M,0;chr1,+2694861,50M,0;chr1,+2755813,50M,1;\tX1:i:1\tX0:i:3", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 2689861, "end": 2689911, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2108:5080:115408", "sequence": "GGTGAGCATCTGACAGCCCGGAGCAGCACGCAAACCCCCAGGTGAGCATC", "qual": "@@BFBBDFHHHHGJIJGIIFIEIJJJJIJJJJJJJJJJJJIJGHHICEHH", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": False, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "18T3A27", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:2\tNM:i:2\tXG:i:0\tX1:i:21\tX0:i:2", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 2750194, "end": 2750244, "mapq": 0, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:1204:10966:151563", "sequence": "CCCCCNCACCCCCAGGTGAGCATCTGATGGTCTGGAGCAGCACCCACACC", "qual": "######F;JJJJJJJJJJJJIIIJIJJJJFJJIJJGJHHHHHFFDDD?BB", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": True, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "1A3A12C31", "origQual": None, "attributes": "XT:A:R\tXO:i:0\tXM:i:3\tNM:i:3\tXG:i:0\tXA:Z:chr1,-2653118,50M,3;chr1,-2652838,50M,3;chr1,-2653681,50M,3;chr1,-2694823,50M,3;chr1,-2687397,50M,3;chr1,-2755775,50M,3;chr1,-2653921,50M,3;\tX1:i:0\tX0:i:8", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None},
                    {"contigName": "chr1", "start": 3052271, "end": 3052321, "mapq": 25, "readName": "D3NH4HQ1:95:D0MT5ACXX:2:2107:21352:43370", "sequence": "TCANTCATCTTCCATCCATCCGTCCAACAACCATTTGTTGATCATCTCTC", "qual": "@@<#4AD?ACDCDHGIDA>C?<A;8CBEEBAG1D?BG?GH?@DEHFG@FH", "cigar": "50M", "readPaired": False, "properPair": False, "readMapped": True, "mateMapped": False, "failedVendorQualityChecks": False, "duplicateRead": False, "readNegativeStrand": False, "mateNegativeStrand": False, "primaryAlignment": True, "secondaryAlignment": False, "supplementaryAlignment": False, "mismatchingPositions": "3C44A0T0", "origQual": None, "attributes": "XT:A:U\tXO:i:0\tXM:i:3\tNM:i:3\tXG:i:0\tX1:i:0\tX0:i:1", "recordGroupName": None, "recordGroupSample": None, "mateAlignmentStart": None, "mateAlignmentEnd": None, "mateContigName": None, "inferredInsertSize": None}]

for read in sampleReadData:
    masterStore.updateRef.remote("reads", read["readName"], read)
    for index in range(len(read["sequence"])):
        data = [read["readName"]]

        neighbors = {}
        if index != 0:
            neighbors[Previous] = set([read["contigName"] + "\t" + str(read["start"] + index - 1)])
        if index != len(read["sequence"]) - 1:
            neighbors[Next] = set([read["contigName"] + "\t" + str(read["start"] + index + 1)])

        node = Node(data, "readsGenomeGraph", neighbors)

        coordinate = read["contigName"] + "\t" + str(read["start"] + index)

        if ray.get(masterStore.refExists.remote("readsGenomeGraph", coordinate)):
            previousNode = ray.get(masterStore.getRef.remote("readsGenomeGraph", coordinate))
            node.merge(previousNode)
            node.data += previousNode.data

        node.addInterGraphLink("reads", read["readName"])
        masterStore.updateRef.remote("readsGenomeGraph", coordinate, node)


# for storing the feature data
masterStore.addGraph.remote("features")

sampleFeatures = [{"featureName": "0", "contigName": "chr1", "start": 45520936, "end": 45522463, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "878", "thickStart": "482.182760214932", "thickEnd": "-1"}},
                    {"featureName": "1", "contigName": "chr1", "start": 88891087, "end": 88891875, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "423", "thickStart": "446.01797654123", "thickEnd": "-1"}},
                    {"featureName": "2", "contigName": "chr1", "start": 181088138, "end": 181090451, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "626", "thickStart": "444.771802710521", "thickEnd": "-1"}},
                    {"featureName": "3", "contigName": "chr1", "start": 179954184, "end": 179955452, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "647", "thickStart": "440.10466093652", "thickEnd": "-1"}},
                    {"featureName": "4", "contigName": "chr1", "start": 246931401, "end": 246932507, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "423", "thickStart": "436.374938660247", "thickEnd": "-1"}},
                    {"featureName": "5", "contigName": "chr1", "start": 28580676, "end": 28582443, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "1106", "thickStart": "434.111845970505", "thickEnd": "-1"}},
                    {"featureName": "6", "contigName": "chr1", "start": 23691459, "end": 23692369, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "421", "thickStart": "426.055504846001", "thickEnd": "-1"}},
                    {"featureName": "7", "contigName": "chr1", "start": 201955033, "end": 201956082, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "522", "thickStart": "423.882565088207", "thickEnd": "-1"}},
                    {"featureName": "8", "contigName": "chr1", "start": 207321011, "end": 207323021, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "741", "thickStart": "423.625988483304", "thickEnd": "-1"}},
                    {"featureName": "9", "contigName": "chr1", "start": 145520936, "end": 145522463, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "878", "thickStart": "482.182760214932", "thickEnd": "-1"}},
                    {"featureName": "10", "contigName": "chr1", "start": 188891087, "end": 188891875, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "423", "thickStart": "446.01797654123", "thickEnd": "-1"}},
                    {"featureName": "11", "contigName": "chr1", "start": 1181088138, "end": 1181090451, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "626", "thickStart": "444.771802710521", "thickEnd": "-1"}},
                    {"featureName": "12", "contigName": "chr1", "start": 1179954184, "end": 1179955452, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "647", "thickStart": "440.10466093652", "thickEnd": "-1"}},
                    {"featureName": "13", "contigName": "chr1", "start": 1246931401, "end": 1246932507, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "423", "thickStart": "436.374938660247", "thickEnd": "-1"}},
                    {"featureName": "14", "contigName": "chr1", "start": 128580676, "end": 128582443, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "1106", "thickStart": "434.111845970505", "thickEnd": "-1"}},
                    {"featureName": "15", "contigName": "chr1", "start": 123691459, "end": 123692369, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "421", "thickStart": "426.055504846001", "thickEnd": "-1"}},
                    {"featureName": "16", "contigName": "chr1", "start": 1201955033, "end": 1201956082, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "522", "thickStart": "423.882565088207", "thickEnd": "-1"}},
                    {"featureName": "17", "contigName": "chr1", "start": 1207321011, "end": 1207323021, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "741", "thickStart": "423.625988483304", "thickEnd": "-1"}},
                    {"featureName": "18", "contigName": "chr1", "start": 1110963118, "end": 1110964762, "score": 0.0, "attributes": {"itemRgb": "5.0696939910406", "blockCount": "758", "thickStart": "421.056761458099", "thickEnd": "-1"}}]

for feature in sampleFeatures:
    node = Node(feature, "features", set())
    for index in range(feature["end"] - feature["start"]):
        coordinate = feature["contigName"] + "\t" + str(read["start"] + index)
        node.addInterGraphLink("readsGenomeGraph", coordinate)
        if ray.get(masterStore.refExists.remote("readsGenomeGraph", coordinate)):
            previousNode = ray.get(masterStore.getRef.remote("readsGenomeGraph", coordinate))
            previousNode.addInterGraphLink("features", feature["featureName"])
            masterStore.updateRef.remote("readsGenomeGraph", coordinate, previousNode)

    masterStore.updateRef.remote("features", feature["featureName"], node)

In [10]:
bfs("readsGenomeGraph", "chr1\t1443674", masterStore)

chr1	1443674	['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']	{'reads': ['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']}
chr1	1443675	['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']	{'reads': ['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']}
chr1	1443676	['D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']	{'reads': ['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413']}
chr1	1443677	['D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']	{'reads': ['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413']}
chr1	1443678	['D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']	{'reads': ['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413']}
chr1	1443679	['D3NH4HQ1:95:D0MT5ACXX:2:2103:21028:126413', 'D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712']	{'reads': ['D3NH4HQ1:95:D0MT5ACXX:2:2103:19714:5712', 'D3NH4HQ1:95:D