In [4]:
from gfagraphs import Graph

In [5]:
def revcomp(string: str, compl: dict = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}) -> str:
    """Tries to compute the reverse complement of a sequence

    Args:
        string (str): original character set
        compl (dict, optional): dict of correspondances. Defaults to {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}.

    Raises:
        IndexError: Happens if revcomp encounters a char that is not in the dict

    Returns:
        str: the reverse-complemented string
    """
    try:
        return ''.join([compl[s] for s in string][::-1])
    except IndexError as exc:
        raise IndexError(
            "Complementarity does not include all chars in sequence.") from exc


In [6]:
path_name:str = "CASBJU01"
start, end = 207966, 209078

In [7]:
graph_A:str = '../data/mgc_v2.9.0/mgc_graph_15_gfa1.gfa'
graph_B:str = '../data/pggb_v0.6.0/pggb_graph_15_yeast.gfa'
graph_MC = Graph(graph_A)
graph_PG = Graph(graph_B)

In [8]:
def display_interval(graph:Graph, path_name:str, start:int, end:int):
    pos_on_path:int = 0
    chain_length:int = 0
    forward, reverse = 0,0
    max_node_length = 0
    sequence:str = ""
    chain:str = ""
    for i in range(len(graph.paths[path_name]['path'])):
        if pos_on_path < start:
            pos_on_path += graph.segments[graph.paths[path_name]['path'][i][0]]['length']
            continue
        if pos_on_path > end:
            break
        else:
            if graph.paths[path_name]['path'][i][1].value == '+':
                forward += 1
            else:
                reverse += 1
            chain += f"[{graph.paths[path_name]['path'][i][0]}{graph.paths[path_name]['path'][i][1].value}]"
            max_node_length = max(max_node_length, graph.segments[graph.paths[path_name]['path'][i][0]]['length'])
            sequence += graph.segments[graph.paths[path_name]['path'][i][0]]['seq'] if graph.paths[path_name]['path'][i][1].value == '+' else revcomp(graph.segments[graph.paths[path_name]['path'][i][0]]['seq'])
            chain_length += 1
    print(f"Chain length: {chain_length}")
    print(f"Forward segments: {forward}, Reverse segments: {reverse}")
    print(f"Max node length: {max_node_length}")
    print(chain)
    print(sequence)

In [9]:
print(start)
print(end)
print(end-start)

207966
209078
1112


In [10]:
display_interval(graph_MC, path_name, start, end)
display_interval(graph_PG, path_name, start, end)

Chain length: 3262
Forward segments: 3262, Reverse segments: 0
Max node length: 201
[29133+][29134+][29136+][29137+][29138+][29139+][29141+][29142+][29144+][29145+][29146+][29148+][29150+][29151+][29154+][29155+][29156+][29158+][29159+][29160+][29163+][29165+][29166+][29168+][29169+][29172+][29173+][29175+][29176+][29177+][29178+][29180+][29181+][29182+][29186+][29187+][29189+][29191+][29192+][29194+][29195+][29196+][29198+][29199+][29201+][29202+][29205+][29206+][29207+][29209+][29212+][29213+][29215+][29216+][29217+][29220+][29222+][29223+][29225+][29227+][29229+][29230+][29231+][29233+][29236+][29237+][29238+][29240+][29241+][29242+][29244+][29247+][29248+][29250+][29251+][29252+][29255+][29256+][29258+][29259+][29261+][29262+][29265+][29266+][29268+][29271+][29272+][29273+][29276+][29277+][29279+][29280+][29281+][29282+][29284+][29285+][29287+][29288+][29290+][29291+][29294+][29295+][29297+][29298+][29300+][29301+][29302+][29304+][29305+][29307+][29308+][29310+][29311+][29312+][293

In [11]:
# Print largest node from both graphs
largest_node_MC = max(graph_MC.segments, key=lambda x: graph_MC.segments[x]['length'])
print(f"Max node in graph A: {largest_node_MC} ({graph_MC.segments[largest_node_MC]['length']})")

largest_node_PG = max(graph_PG.segments, key=lambda x: graph_PG.segments[x]['length'])
print(f"Max node in graph B: {largest_node_PG} ({graph_PG.segments[largest_node_PG]['length']})")

# Print largest node in reference
reference_nodes = set([x for x,_ in graph_MC.paths[path_name]['path']])
largest_node_ref = max({seg:val for seg,val in graph_MC.segments.items() if seg in reference_nodes}, key=lambda x: graph_MC.segments[x]['length'])

print(f"Max node in reference: {largest_node_ref} ({graph_MC.segments[largest_node_ref]['length']})")

reference_nodes = set([x for x,_ in graph_PG.paths[path_name]['path']])
largest_node_ref = max({seg:val for seg,val in graph_PG.segments.items() if seg in reference_nodes}, key=lambda x: graph_PG.segments[x]['length'])

print(f"Max node in reference: {largest_node_ref} ({graph_PG.segments[largest_node_ref]['length']})")

Max node in graph A: 14121 (12521)
Max node in graph B: 21478 (12759)
Max node in reference: 9231 (593)
Max node in reference: 3173 (593)
