In [1]:
import pandas as pd
import numpy as np
import glob 
import igraph as gr

Get filenames from list(dfs.keys())
Get paths using get_xxx_paths
Get dfs using get_xxx_dfs

In [2]:
def remove_path(full_path):
    
    if full_path.find('/') == -1:
        pass
        
    if full_path.find('/') != -1:
        filename = full_path.split("/")[-1]
        
    return(filename)

In [3]:
def get_filename(path_or_filename_with_extension):
    # This will fail if more than one . is in the file, such as ".tar.gz"
    
    cur_filename_with_extension = remove_path(path_or_filename_with_extension)
    
    noExtension = cur_filename_with_extension.split(".")[0]
    
    return(noExtension)
    

In [4]:
def get_gnet_paths(dir):
    gnet_filename_list = []
    gnet_path_list = []

    for ipath in glob.glob(data_dir + '/*.gnet'):

        gnet_path_list.append(ipath)
        gnet_filename_list.append(get_filename(ipath))
         
    return gnet_path_list


In [5]:
def get_gnet_dfs(dir):

    gnet_dfs = {}
    total_nodes = {}
    
    gnet_path_list = get_gnet_paths(dir)

    for ignet_paths in gnet_path_list:

        cur_filename = get_filename(ignet_paths) 

        gnet_dfs[cur_filename] = pd.read_csv(ignet_paths,sep='\t').reset_index()

        total_nodes[cur_filename] = int(gnet_dfs[cur_filename].columns[2])

        gnet_dfs[cur_filename] = gnet_dfs[cur_filename].rename(columns = {'level_0':'Source','level_1':'Target',str(total_nodes[cur_filename]):'Length'})

    
    return gnet_dfs

In [6]:
def get_edge_list_from_df(df):
    
    uncleaned_edges = df.values[:,[0,1]]
    
    edge_list = []

    for iter_edges in uncleaned_edges:    
        
        cur_element = [int(iter_edges[0]), int(iter_edges[1])]
        
        edge_list.append(cur_element)

    return edge_list

In [7]:
def create_igraph_from_edgelist(edge_list):
    
    n_vertices = max(max(edge_list))
    
    g = gr.Graph()
    
    g.add_vertices(n_vertices+1)
    
    g.add_edges(edge_list)
    
    return g

In [8]:
def calc_logic_vector(df, col, value):

    query_vector = value*np.ones(len(test_df))
    logic = df[col] == query_vector
    
    return logic

In [9]:
def find_correct_rows(df, edge_list):

    logical_indices = []
        
    for cur_pair in edge_list:
        
        cur_source = int(cur_pair[0])
        cur_target = int(cur_pair[1])
        
        source_logical_vector = calc_logic_vector(
                                df=df, col='Source', value=cur_source)
        
        target_logical_vector = calc_logic_vector(
                                df=df, col='Target', value=cur_target)
        
        combined_vector = source_logical_vector & target_logical_vector
        logical_indices.append(combined_vector)
        
    
    base_logical_vector = np.zeros(len(df)) == np.ones(len(df))
        
    for logical_index in logical_indices:
        base_logical_vector = base_logical_vector | logical_index
        
    return(base_logical_vector)

In [10]:
def get_raw_dataframe(decomp_graph, df):
    
    col = ['Filename','Nodes','Edges','Length']
    raw_dataframe = pd.DataFrame(columns=col)

    for every_graph in decomp_graph:
        cur_decomp_edgelist = every_graph.get_edgelist()
        cur_logic = find_correct_rows(df=df, edge_list=cur_decomp_edgelist)
        cur_nodes = gr.Graph.vcount(every_graph)
        cur_edges = gr.Graph.ecount(every_graph)
        cur_length = sum(df.loc[cur_logic].Length.values)

        cur_graph_dict = {'Filename': cur_filename, 'Nodes': cur_nodes, 
                          'Edges': cur_edges, 'Length': cur_length}

        new_rows = pd.DataFrame.from_dict([cur_graph_dict])

        raw_dataframe = raw_dataframe.append(new_rows)
        
        
    return raw_dataframe
    

In [18]:
# Get the paths of the gnet files for processing

data_dir = '/Users/granthussey/github/MitoScripts/MitoScripts/data'
gnet_dfs = get_gnet_dfs(data_dir)

gnet_filenames = list(gnet_dfs.keys())

cur_filename = 'KRAS_mdivi_2W_001_000'

test_df = gnet_dfs[cur_filename]
test_df

Unnamed: 0,Source,Target,Length
0,522,517,0.41361
1,521,467,1.62278
2,520,466,0.83551
3,519,429,2.06772
4,518,425,6.27854
5,516,412,0.72841
6,515,372,0.52736
7,514,330,3.21937
8,513,482,2.09949
9,512,437,3.06025


In [12]:
cur_edge_list = get_edge_list_from_df(test_df)
cur_edge_list

[[522, 517],
 [521, 467],
 [520, 466],
 [519, 429],
 [518, 425],
 [516, 412],
 [515, 372],
 [514, 330],
 [513, 482],
 [512, 437],
 [510, 432],
 [509, 429],
 [386, 427],
 [508, 471],
 [507, 354],
 [506, 424],
 [505, 468],
 [503, 389],
 [502, 500],
 [501, 465],
 [499, 349],
 [498, 497],
 [496, 270],
 [494, 371],
 [493, 492],
 [372, 452],
 [372, 370],
 [491, 489],
 [490, 334],
 [488, 487],
 [486, 368],
 [485, 406],
 [484, 483],
 [481, 444],
 [480, 479],
 [478, 443],
 [476, 402],
 [475, 474],
 [474, 396],
 [474, 440],
 [437, 358],
 [437, 395],
 [473, 391],
 [472, 423],
 [386, 390],
 [427, 426],
 [470, 469],
 [464, 419],
 [463, 419],
 [462, 461],
 [460, 345],
 [459, 378],
 [459, 415],
 [459, 350],
 [458, 307],
 [457, 377],
 [456, 454],
 [455, 454],
 [454, 453],
 [451, 370],
 [451, 371],
 [452, 369],
 [451, 233],
 [450, 200],
 [449, 297],
 [448, 331],
 [447, 294],
 [446, 445],
 [444, 400],
 [444, 401],
 [443, 399],
 [442, 328],
 [441, 362],
 [440, 397],
 [440, 398],
 [439, 361],
 [438, 325],

In [13]:
cur_graph = create_igraph_from_edgelist(cur_edge_list)
cur_graph

<igraph.Graph at 0x11d2197c8>

In [14]:
decomp_graph = gr.Graph.decompose(cur_graph)
decomp_graph

[<igraph.Graph at 0x11d2198b8>,
 <igraph.Graph at 0x11d2199a8>,
 <igraph.Graph at 0x11d219a98>,
 <igraph.Graph at 0x11d219b88>,
 <igraph.Graph at 0x11d219c78>,
 <igraph.Graph at 0x11d219d68>,
 <igraph.Graph at 0x11d219e58>,
 <igraph.Graph at 0x11d2c8048>,
 <igraph.Graph at 0x11d2c8138>,
 <igraph.Graph at 0x11d2c8228>,
 <igraph.Graph at 0x11d2c8318>,
 <igraph.Graph at 0x11d2c8408>,
 <igraph.Graph at 0x11d2c84f8>,
 <igraph.Graph at 0x11d2c85e8>,
 <igraph.Graph at 0x11d2c86d8>,
 <igraph.Graph at 0x11d2c87c8>,
 <igraph.Graph at 0x11d2c88b8>,
 <igraph.Graph at 0x11d2c89a8>,
 <igraph.Graph at 0x11d2c8a98>,
 <igraph.Graph at 0x11d2c8b88>,
 <igraph.Graph at 0x11d2c8c78>,
 <igraph.Graph at 0x11d2c8d68>,
 <igraph.Graph at 0x11d2c8e58>,
 <igraph.Graph at 0x11d2c9048>,
 <igraph.Graph at 0x11d2c9138>,
 <igraph.Graph at 0x11d2c9228>,
 <igraph.Graph at 0x11d2c9318>,
 <igraph.Graph at 0x11d2c9408>,
 <igraph.Graph at 0x11d2c94f8>,
 <igraph.Graph at 0x11d2c95e8>,
 <igraph.Graph at 0x11d2c96d8>,
 <igraph

Unnamed: 0,Filename,Nodes,Edges,Length
0,KRAS_mdivi_2W_001_000,1,1,1.17471
0,KRAS_mdivi_2W_001_000,8,8,0.00000
0,KRAS_mdivi_2W_001_000,2,1,0.00000
0,KRAS_mdivi_2W_001_000,270,334,0.58789
0,KRAS_mdivi_2W_001_000,2,1,0.00000
...,...,...,...,...
0,KRAS_mdivi_2W_001_000,2,1,0.00000
0,KRAS_mdivi_2W_001_000,2,1,0.00000
0,KRAS_mdivi_2W_001_000,2,1,0.00000
0,KRAS_mdivi_2W_001_000,2,1,0.00000


In [16]:
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000

In [17]:
get_raw_dataframe(decomp_graph=decomp_graph, df=test_df)

Unnamed: 0,Filename,Nodes,Edges,Length
0,KRAS_mdivi_2W_001_000,1,1,1.17471
0,KRAS_mdivi_2W_001_000,8,8,0.0
0,KRAS_mdivi_2W_001_000,2,1,0.0
0,KRAS_mdivi_2W_001_000,270,334,0.58789
0,KRAS_mdivi_2W_001_000,2,1,0.0
0,KRAS_mdivi_2W_001_000,33,39,0.0
0,KRAS_mdivi_2W_001_000,2,1,0.0
0,KRAS_mdivi_2W_001_000,2,1,0.0
0,KRAS_mdivi_2W_001_000,2,1,0.0
0,KRAS_mdivi_2W_001_000,2,1,0.0
