In [None]:
# returns dictionary of names
# to pairs of x,y coordinates

def get_info_from_csv_lines(lines):
    # I want a map from gene name to x,y pair
    map_gene_xy = {}
    i = -1
    for line in lines:
        i += 1
        if (i != 0):
            name, x, y = line.split(',')
            pair_xy = [float(x), float(y)]
            map_gene_xy[name] = pair_xy
    return map_gene_xy

# returns dictionary of names to list of pairs,
# where each pair is the adjacency and its type

def populate_adj_list_from_tsv_lines(lines):
    # returns another map. The key is a node
    # in the first column, which maps to a list
    # of pairs. Each pair is the node to which
    # the edge exists and its category
    # 'SL', 'Non-SL', 'Inconclusive'
    map_gene_adj = {}
    i = -1
    for line in lines:
        i += 1
        if (i != 0):
            g1, g2, score, category = line.split('\t') 
            if g1 in map_gene_adj:
                pair = [g2, category]
                if pair not in map_gene_adj[g1]:
                    map_gene_adj[g1].append(pair)
            else: # no key exists so start the list
                map_gene_adj[g1] = [[g2, category]]
    return map_gene_adj

# returns dictionary of names to list of homologs
def populate_adj_homolog_adj_list_from_txt_lines(lines):
    map_gene_homolog = {}
    for line in lines:
        g1, g2 = line.split(' ')
        if g1 in map_gene_homolog:
            if g2 not in map_gene_homolog[g1]:
                map_gene_homolog[g1].append(g2)
        else:
            map_gene_homolog[g1] = [g2]
    return map_gene_homolog
    
import matplotlib.pyplot as plt

# Let's start getting files
merged_csv_lines = [line.rstrip('\n') for line in open('MergedNodeVecs2.csv')]
sc_csv_lines = [line.rstrip('\n') for line in open('ScNodeVecs2.csv')]
sp_csv_lines = [line.rstrip('\n') for line in open('SpNodeVecs2.csv')]
sc_tsv_edges_and_categories = [line.rstrip('\n') for line in open('collins-sc-emap-gis-std.tsv')]
sp_tsv_edges_and_categories = [line.rstrip('\n') for line in open('roguev-sp-emap-gis-std.tsv')]
homolog_txt_lines = [line.rstrip('\n') for line in open('HomologsScSp.txt')]

# Populate the maps accordingly based on the type of file
gene_coordinate_map = get_info_from_csv_lines(merged_csv_lines)
sc_gene_coordinate_map = get_info_from_csv_lines(sc_csv_lines)
sp_gene_coordinate_map = get_info_from_csv_lines(sp_csv_lines)
sc_gene_edge_map = populate_adj_list_from_tsv_lines(sc_tsv_edges_and_categories)
sp_gene_edge_map = populate_adj_list_from_tsv_lines(sp_tsv_edges_and_categories)
gene_homolog_map = populate_adj_homolog_adj_list_from_txt_lines(homolog_txt_lines)

# Plot points
for gene in gene_coordinate_map:
    x, y = gene_coordinate_map[gene]
    # print('plotting (' + str(x) + ',' + str(y) + ')')
    
    # Plot points from Sp red and Sc blue
    if gene in sp_gene_coordinate_map:
        plt.scatter(x, y, s=0.25, c='#FF0000')
    elif gene in sc_gene_coordinate_map:
        plt.scatter(x, y, s=0.25, c='#0000FF')
    else:
        print('Point existed in neither Sc nor Sp')
    
# Plot Sp edges
# print('starting Sp')
i = 0
for gene in sp_gene_edge_map:
    x1, y1 = gene_coordinate_map[gene]
    adjacencies = sp_gene_edge_map[gene]
    for adj in adjacencies:
        neighbor, category = adj
        
        i += 1
        
        x2, y2 = gene_coordinate_map[neighbor]
        
        if category == 'Inconclusive':
            # print('edge ' + str(i) + ' inc')
            plt.plot([x1, x2], [y1, y2], c='tab:gray', ls='solid')
        elif category == 'Non-SL':
            # print('edge ' + str(i) + ' NSL')
            plt.plot([x1, x2], [y1, y2], c='tab:green', ls='solid')
        else: # category == 'SL'
            # print('edge ' + str(i) + ' SL')
            plt.plot([x1, x2], [y1, y2], c='tab:orange', ls='solid')
            
# Plot edges
# print('starting Sc')
i = 0
for gene in sc_gene_edge_map:
    x1, y1 = gene_coordinate_map[gene]
    adjacencies = sc_gene_edge_map[gene]
    for adj in adjacencies:
        neighbor, category = adj
        
        i += 1
        
        x2, y2 = gene_coordinate_map[neighbor]
        
        if category == 'Inconclusive':
            # print('edge ' + str(i) + ' inc')
            plt.plot([x1, x2], [y1, y2], c='tab:gray', ls='solid')
        elif category == 'Non-SL':
            # print('edge ' + str(i) + ' NSL')
            plt.plot([x1, x2], [y1, y2], c='tab:blue', ls='solid')
        else: # category == 'SL'
            # print('edge ' + str(i) + ' SL')
            plt.plot([x1, x2], [y1, y2], c='tab:red', ls='solid')
            
# Plot Homologs
# print ('starting Homologs')
i = 0
for gene in gene_homolog_map:
    x1, y1 = gene_coordinate_map[gene]
    adjacencies = gene_homolog_map[gene]
    for neighbor in adjacencies:
        i += 1
        
        x2, y2 = gene_coordinate_map[neighbor]
        
        # print('About to plot homolog edge ' + str(i))
        plt.plot([x1, x2], [y1, y2], c='tab:cyan', ls='solid')
        
# Display 
plt.axis([-0.3, 0.75, -1.95, 1.725])
plt.show()

['P41338 Q9UQW6', 'Q05911 O60105', 'P05373 P78974', 'P09950 O14092', 'P00812 Q10066', 'P04076 P50514', 'P38995 O59666', 'P11353 Q9UTE2', 'P09624 O00087', 'Q00578 O13768', 'Q12480 P78790', 'P08417 O94552', 'P08431 Q9HDU5', 'P51601 O13774', 'P49095 Q09785', 'P12709 P78917', 'Q08220 P35669', 'Q03834 O74502', 'P28789 Q09899', 'P04819-1 P12000', 'P38920 Q9P7W6', 'P25847 O74773', 'Q05788 Q9UTG1', 'P07991 Q9P7L5', 'P07283 Q9UTJ2', 'P40012 Q10062', 'P32347 Q9USJ5', 'P06839 P26659', 'P07277 Q09780', 'P00445 P28758', 'P48015 O14110', 'P49435 O42842', 'P05150 P31317', 'P00447 Q9UQX0', 'P41921-1 P78965', 'P17649 O13837', 'P39954 O13639', 'P07244 P20772', 'P15454 Q9P6I5', 'P32473 Q09171', 'P04050 P36594', 'P08518 Q02061', 'P16603 P36587', 'P10363 O14215', 'P20457 O74761', 'P14126 P40372', 'P49626 Q9P784', 'P0CX50 Q8TFH1', 'P24000 Q92354', 'P14120 Q9UTP0', 'P0CX28 Q9UTI8', 'P05750 O60128', 'P26783 Q9P3T6', 'Q08745 O13614', 'P0CX52 P0CT65', 'P0CX30 P0CT76', 'P38711 O74330', 'P21524 P36602', 'P06785 Q