In [49]:
from genotype_network.protein import ProteinGN
from collections import Counter

In [50]:
gn = ProteinGN()
gn.read_genotype_network('PB2_2010-2015.pkl')

In [51]:
# Basic statistics about the network.
print(len(gn.edges))
print(len(gn.nodes))

1950
3294


In [52]:
# Print the length of each of the nodes.
lengths = Counter()
for n, d in gn.nodes:
    lengths[len(n)] += 1

lengths.most_common(1)

[(759, 3252)]

In [53]:
# Remove nodes not of the most common length.
for n, d in gn.nodes:
    if len(n) != lengths.most_common(1)[0][0]:
        gn.G.remove_node(n)
        
len(gn.nodes)

3252

In [71]:
# Find all nodes with a given polymorphism at a given position.

def nodes_with_polymorphism(pos, letter):
    """
    Returns the nodes that have a given polymorphism letter at a specified position.
    """
    nodes = set()
    for n, d in gn.nodes:
        if n[pos-1] == letter:
            nodes.add(n)
        
    return nodes

polymorph_nodes = nodes_with_polymorphism(627, 'K')
len(polymorph_nodes)

399

In [72]:
def polymorphism_nodes_neighbors(pos, letter):
    """
    Finds the nodes that have a given polymorphism.
    
    Then, it returns the neighbors of the polymorphism that do not have that
    given polymorphism at that position.
    """
    
    nodes = find_nodes_with_polymorphism(pos, letter)
    
    neighbors = set()
    for n in nodes:
        for n2 in gn.G.neighbors(n):
            if n2[pos-1] != letter:
                neighbors.add(n2)
            
    return neighbors

polymorph_neighbors = polymorphism_nodes_neighbors(627, 'K')
len(polymorph_neighbors)

11

In [73]:
"""
The polymorphisms of interest are recorded as a dictionary below.
"""

polymorphs_of_interest = {63:['I'],
                          158:['G'],
                          199:['S'],
                          256:['G'],
                          271:['A'],
                          360:['Y'],
                          471:['M'],
                          482:['R'],
                          588:['I'],
                          590:['S'],
                          591:['K', 'R'],
                          627:['K'],
                          636:['F'],
                          661:['T'],
                          667:['I'],
                          701:['N'],
                          702:['R']}

In [75]:
# Count the total number of nodes + neighbors that will have to be made.

all_nodes = set()
for pos, letters in polymorphs_of_interest.items():
    for letter in letters:
        all_nodes = all_nodes.union(nodes_with_polymorphism(pos, letter))
        all_nodes = all_nodes.union(polymorphism_nodes_neighbors(pos, letter))
        
len(all_nodes)

3252