# Data Preprocessing

In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix

## Cook et al. data
### The data for Cook et al. is downloaded from Supplementary Information of 'Cook et al. Whole-animal connectomes of both Caenorhabditis elegans sexes, Nature, 2019': https://doi.org/10.1038/s41586-019-1352-7 (the data is also available at https://www.wormwiring.org/ ). The data is then preprocessed (using excel) to include neurons only. The slightly edited original data (to include neurons only and symmetric connectivity for gap junctions) is at /data/cook_etal_SI5_adjacency_matrices_neurons.xlsx. This notebook preprocesses the data into conventional adjacency matrices and edgelists. The data will be saved as cook_description_adjacency_matrices.csv and cook__description__edgelist.csv.

#### Load the data

In [2]:
full_path = Path(os.getcwd())
#herm_chem
cook_herm_chem = pd.read_excel(Path(full_path.parent, 'data', 'cook_etal_SI5_adjacency_matrices_neurons.xlsx'),
                                   sheet_name = 'hermaphrodite chemical',
                                  index_col='Neurons')
cook_herm_chem.fillna(0, inplace=True)

#herm gap junction (electrical)
cook_herm_elec = pd.read_excel(Path(full_path.parent, 'data', 'cook_etal_SI5_adjacency_matrices_neurons.xlsx'),
                                   sheet_name = 'herm gap jn symmetric',
                                  index_col='Neurons')
cook_herm_elec.fillna(0, inplace=True)

#combined (chemical and electrical) weighted directed connectome
cook_herm_combined = cook_herm_chem + cook_herm_elec

#male chemical
cook_male_chem = pd.read_excel(Path(full_path.parent, 'data', 'cook_etal_SI5_adjacency_matrices_neurons.xlsx'),
                                   sheet_name = 'male chemical',
                                  index_col='Neurons')
cook_male_chem.fillna(0, inplace=True)

#male gap (electrical)
cook_male_elec = pd.read_excel(Path(full_path.parent, 'data', 'cook_etal_SI5_adjacency_matrices_neurons.xlsx'),
                                   sheet_name = 'male gap jn symmetric',
                                  index_col='Neurons')
cook_male_elec.fillna(0, inplace=True)

#combined male adjacency matrix
cook_male_combined = cook_male_chem + cook_male_elec

#### Save the chemical and electrical adjacency matrices separately and combined

In [3]:
#hermaphrodite data
cook_herm_chem.to_csv(Path(full_path.parent, 'data', 'cook_herm_chem_AM.csv'))
cook_herm_elec.to_csv(Path(full_path.parent, 'data', 'cook_herm_elec_AM.csv'))
cook_herm_combined.to_csv(Path(full_path.parent, 'data', 'cook_herm_combined_AM.csv'))

#male data
cook_male_chem.to_csv(Path(full_path.parent, 'data', 'cook_male_chem_AM.csv'))
cook_male_elec.to_csv(Path(full_path.parent, 'data', 'cook_male_elec_AM.csv'))
cook_male_combined.to_csv(Path(full_path.parent, 'data', 'cook_male_combined_AM.csv'))

#### Convert the adjancency matrices to edgelist 

In [4]:
#herm chemical
#el = edgelist
print('Number of edges (synapses):')
cook_herm_chem_el = cook_herm_chem.stack().reset_index().rename(
    columns={'Neurons':'Source','level_1':'Target', 0:'Weight'}) 
cook_herm_chem_el  = cook_herm_chem_el[cook_herm_chem_el['Weight']!=0]
print('\tCook herm chem:', len(cook_herm_chem_el))


#herm electrical
cook_herm_elec_el = cook_herm_elec.stack().reset_index().rename(
    columns={'Neurons':'Source','level_1':'Target', 0:'Weight'})
cook_herm_elec_el = cook_herm_elec_el[cook_herm_elec_el['Weight']!=0]
print('\tCook herm elec:', len(cook_herm_elec_el))

#herm combined
cook_herm_combined_el = cook_herm_combined.stack().reset_index().rename(
    columns={'Neurons':'Source','level_1':'Target', 0:'Weight'})
cook_herm_combined_el = cook_herm_combined_el[cook_herm_combined_el['Weight']!=0]
print('\tCook herm combined:', len(cook_herm_combined_el))

#male chemical
cook_male_chem_el = cook_male_chem.stack().reset_index().rename(
    columns={'Neurons':'Source','level_1':'Target', 0:'Weight'}) 
cook_male_chem_el  = cook_male_chem_el[cook_male_chem_el['Weight']!=0]
print('\tCook male chem:', len(cook_male_chem_el))


#male electrical
cook_male_elec_el = cook_male_elec.stack().reset_index().rename(
    columns={'Neurons':'Source','level_1':'Target', 0:'Weight'})
cook_male_elec_el = cook_male_elec_el[cook_male_elec_el['Weight']!=0]
print('\tCook male elec:', len(cook_male_elec_el))


#male combined
cook_male_combined_el = cook_male_combined.stack().reset_index().rename(
    columns={'Neurons':'Source','level_1':'Target', 0:'Weight'})
cook_male_combined_el = cook_male_combined_el[cook_male_combined_el['Weight']!=0]
print('\tCook male combined:', len(cook_male_combined_el))

Number of edges (synapses):
	Cook herm chem: 3565
	Cook herm elec: 2121
	Cook herm combined: 4964
	Cook male chem: 3905
	Cook male elec: 2508
	Cook male combined: 5532


#### Add information about cell types (sensory, interneuron and motor neuron) on the edgelist
#### Information about cell types were downloaded from Supplementary Information 4 of Cook et al. paper and is saved as cook_cell_types.csv

In [5]:
#assign cell types to source cells\
def assign_cell_types(edgelist_df, cell_types_df, cell_list_type='cell_name'):
    #cell class can be listed either as individual cell (ADAL:sensory) or as class class (ADA:sensory)
    
    source_ct_list = [] #Source neuron cell type
    target_ct_list = [] #Target neurons cell type
    
    for edge in edgelist_df.itertuples():
        
        source_cell = edge.Source.strip()
        target_cell = edge.Target.strip()
        
        #get the row which contains the source and targets of the edge in the cell_type dataframe
        if cell_list_type=='cell_class':
            source_ct = cell_types_df[cell_types_df['cell_class']==source_cell[:3]]['cell_type'].values[0]
            target_ct = cell_types_df[cell_types_df['cell_class']==target_cell[:3]]['cell_type'].values[0]     
        else:
            source_ct = cell_types_df[cell_types_df['cell_name']==source_cell]['cell_type'].values[0]
            target_ct = cell_types_df[cell_types_df['cell_name']==target_cell]['cell_type'].values[0]            
      
        source_ct_list.append(source_ct)
        target_ct_list.append(target_ct)
            
    return source_ct_list, target_ct_list


In [6]:
#load the information about cell types
cook_cell_types = pd.read_csv(Path(full_path.parent, 'data','cook_etal_SI4_cell_types.csv'))

#herm chemical
#strip the spaces around the strings if there is any
cook_herm_chem_el['Source'] = cook_herm_chem_el.Source.apply(lambda x: x.strip())
cook_herm_chem_el['Target'] = cook_herm_chem_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(cook_herm_chem_el, cook_cell_types)
cook_herm_chem_el['source_ct'] = source_ct
cook_herm_chem_el['target_ct'] = target_ct

#herm electrical
cook_herm_elec_el['Source'] = cook_herm_elec_el.Source.apply(lambda x: x.strip())
cook_herm_elec_el['Target'] = cook_herm_elec_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(cook_herm_elec_el, cook_cell_types)
cook_herm_elec_el['source_ct'] = source_ct
cook_herm_elec_el['target_ct'] = target_ct


#herm combined
cook_herm_combined_el['Source'] = cook_herm_combined_el.Source.apply(lambda x: x.strip())
cook_herm_combined_el['Target'] = cook_herm_combined_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(cook_herm_combined_el, cook_cell_types)
cook_herm_combined_el['source_ct'] = source_ct
cook_herm_combined_el['target_ct'] = target_ct


#male chemical
cook_male_chem_el['Source'] = cook_male_chem_el.Source.apply(lambda x: x.strip())
cook_male_chem_el['Target'] = cook_male_chem_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(cook_male_chem_el, cook_cell_types)
cook_male_chem_el['source_ct'] = source_ct
cook_male_chem_el['target_ct'] = target_ct


#male electrical
cook_male_elec_el['Source'] = cook_male_elec_el.Source.apply(lambda x: x.strip())
cook_male_elec_el['Target'] = cook_male_elec_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(cook_male_elec_el, cook_cell_types)
cook_male_elec_el['source_ct'] = source_ct
cook_male_elec_el['target_ct'] = target_ct

#male combined
cook_male_combined_el['Source'] = cook_male_combined_el.Source.apply(lambda x: x.strip())
cook_male_combined_el['Target'] = cook_male_combined_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(cook_male_combined_el, cook_cell_types)
cook_male_combined_el['source_ct'] = source_ct
cook_male_combined_el['target_ct'] = target_ct


#### Check the connectdness of the networks, extract the largest stronlgy connected component if the network is not fully connected

In [7]:
#Cook herm connectomes
cook_herm_comb_G = nx.from_pandas_edgelist(cook_herm_combined_el, 'Source','Target',edge_attr=True,create_using=nx.DiGraph)
cook_herm_chem_G = nx.from_pandas_edgelist(cook_herm_chem_el, 'Source','Target',edge_attr=True, create_using=nx.DiGraph)
cook_herm_elec_G = nx.from_pandas_edgelist(cook_herm_elec_el, 'Source','Target',edge_attr=True, create_using=nx.DiGraph)


#Cook male connectomes
cook_male_comb_G = nx.from_pandas_edgelist(cook_male_combined_el, 'Source','Target', edge_attr=True, create_using=nx.DiGraph)
cook_male_chem_G = nx.from_pandas_edgelist(cook_male_chem_el, 'Source','Target', edge_attr=True, create_using=nx.DiGraph)
cook_male_elec_G = nx.from_pandas_edgelist(cook_male_elec_el, 'Source','Target', edge_attr=True, create_using=nx.DiGraph)

In [8]:
print('Cook herm chem strongly connected:', nx.is_strongly_connected(cook_herm_chem_G))
print('Cook herm elec strongly connected:', nx.is_strongly_connected(cook_herm_elec_G))
print('Cook herm comb strongly connected:', nx.is_strongly_connected(cook_herm_comb_G))

print('Cook male chem strongly connected:', nx.is_strongly_connected(cook_male_chem_G))
print('Cook male elec strongly connected:', nx.is_strongly_connected(cook_male_elec_G))
print('Cook male comb strongly connected:', nx.is_strongly_connected(cook_male_comb_G))

Cook herm chem strongly connected: False
Cook herm elec strongly connected: False
Cook herm comb strongly connected: True
Cook male chem strongly connected: False
Cook male elec strongly connected: False
Cook male comb strongly connected: False


In [29]:
print('Cook herm chem weakly connected:', nx.is_weakly_connected(cook_herm_chem_G))
print('Cook herm elec weakly connected:', nx.is_weakly_connected(cook_herm_elec_G))
print('Cook herm comb weakly connected:', nx.is_weakly_connected(cook_herm_comb_G))

print('Cook male chem weakly connected:', nx.is_weakly_connected(cook_male_chem_G))
print('Cook male elec weakly connected:', nx.is_weakly_connected(cook_male_elec_G))
print('Cook male comb weakly connected:', nx.is_weakly_connected(cook_male_comb_G))

Cook herm chem weakly connected: True
Cook herm elec weakly connected: False
Cook herm comb weakly connected: True
Cook male chem weakly connected: True
Cook male elec weakly connected: False
Cook male comb weakly connected: True


In [11]:
cook_herm_chem_SCC=list(nx.strongly_connected_components(cook_herm_chem_G))
cook_herm_chem_num_scc_nodes = [len(component) for component in cook_herm_chem_SCC]
cook_herm_chem_largest_scc_percent = (len(max(cook_herm_chem_SCC, key=len))/cook_herm_chem_G.number_of_nodes())*100
print('Cook herm chem number of nodes in strongly connected component:',cook_herm_chem_num_scc_nodes)
print('\tPercentage of largest component:', cook_herm_chem_largest_scc_percent, '\n')


cook_herm_elec_SCC=list(nx.strongly_connected_components(cook_herm_elec_G))
cook_herm_elec_num_scc_nodes = [len(component) for component in cook_herm_elec_SCC]
cook_herm_elec_largest_scc_percent = (len(max(cook_herm_elec_SCC, key=len))/cook_herm_elec_G.number_of_nodes())*100
print('Cook herm elec number of nodes in strongly connected component:',cook_herm_elec_num_scc_nodes)
print('\tPercentage of largest component:', cook_herm_elec_largest_scc_percent, '\n')

cook_herm_comb_SCC=list(nx.strongly_connected_components(cook_herm_comb_G))
cook_herm_comb_num_scc_nodes = [len(component) for component in cook_herm_comb_SCC]
cook_herm_comb_largest_scc_percent = (len(max(cook_herm_comb_SCC, key=len))/cook_herm_comb_G.number_of_nodes())*100
print('Cook herm comb number of nodes in strongly connected component:',cook_herm_comb_num_scc_nodes)
print('\tPercentage of largest component:', cook_herm_comb_largest_scc_percent, '\n')

Cook herm chem number of nodes in strongly connected component: [1, 1, 1, 275, 1, 1]
	Percentage of largest component: 98.21428571428571 

Cook herm elec number of nodes in strongly connected component: [274, 2]
	Percentage of largest component: 99.27536231884058 

Cook herm comb number of nodes in strongly connected component: [280]
	Percentage of largest component: 100.0 



In [12]:
cook_male_chem_SCC=list(nx.strongly_connected_components(cook_male_chem_G))
cook_male_chem_num_scc_nodes = [len(component) for component in cook_male_chem_SCC]
cook_male_chem_largest_scc_percent = (len(max(cook_male_chem_SCC, key=len))/cook_male_chem_G.number_of_nodes())*100
print('Cook male chem number of nodes in strongly connected component:',cook_male_chem_num_scc_nodes)
print('\tPercentage of largest component:', cook_male_chem_largest_scc_percent, '\n')


cook_male_elec_SCC=list(nx.strongly_connected_components(cook_male_elec_G))
cook_male_elec_num_scc_nodes = [len(component) for component in cook_male_elec_SCC]
cook_male_elec_largest_scc_percent = (len(max(cook_male_elec_SCC, key=len))/cook_male_elec_G.number_of_nodes())*100
print('Cook male elec number of nodes in strongly connected component:',cook_male_elec_num_scc_nodes)
print('\tPercentage of largest component:', cook_male_elec_largest_scc_percent, '\n')

cook_male_comb_SCC=list(nx.strongly_connected_components(cook_male_comb_G))
cook_male_comb_num_scc_nodes = [len(component) for component in cook_male_comb_SCC]
cook_male_comb_largest_scc_percent = (len(max(cook_male_comb_SCC, key=len))/cook_male_comb_G.number_of_nodes())*100
print('Cook male comb number of nodes in strongly connected component:',cook_male_comb_num_scc_nodes)
print('\tPercentage of largest component:', cook_male_comb_largest_scc_percent, '\n')

Cook male chem number of nodes in strongly connected component: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 339, 1, 1, 1, 1, 1, 1]
	Percentage of largest component: 94.6927374301676 

Cook male elec number of nodes in strongly connected component: [323, 2]
	Percentage of largest component: 99.38461538461539 

Cook male comb number of nodes in strongly connected component: [1, 1, 1, 1, 1, 1, 351, 1]
	Percentage of largest component: 98.04469273743017 



In [31]:
[component for component in cook_male_comb_SCC]

[{'SAADL'},
 {'RIPL'},
 {'SAADR'},
 {'RMFL'},
 {'SABD'},
 {'URADL'},
 {'ADAL',
  'ADEL',
  'ADER',
  'ADFL',
  'ADFR',
  'ADLL',
  'ADLR',
  'AFDL',
  'AFDR',
  'AIAL',
  'AIAR',
  'AIBL',
  'AIBR',
  'AIML',
  'AIMR',
  'AINR',
  'AIYL',
  'AIYR',
  'AIZL',
  'AIZR',
  'ALA',
  'ALML',
  'ALMR',
  'ALNL',
  'ALNR',
  'AQR',
  'AS01',
  'AS02',
  'AS03',
  'AS04',
  'AS05',
  'AS06',
  'AS07',
  'AS08',
  'AS09',
  'AS10',
  'AS11',
  'ASEL',
  'ASER',
  'ASGL',
  'ASGR',
  'ASHL',
  'ASHR',
  'ASIL',
  'ASIR',
  'ASJL',
  'ASJR',
  'ASKL',
  'ASKR',
  'AUAL',
  'AUAR',
  'AVAL',
  'AVAR',
  'AVBL',
  'AVBR',
  'AVDL',
  'AVDR',
  'AVEL',
  'AVER',
  'AVFL',
  'AVFR',
  'AVG',
  'AVHL',
  'AVHR',
  'AVJL',
  'AVJR',
  'AVKL',
  'AVKR',
  'AVL',
  'AVM',
  'AWAL',
  'AWAR',
  'AWBL',
  'AWBR',
  'AWCL',
  'AWCR',
  'BAGL',
  'BDUL',
  'BDUR',
  'CA02',
  'CA03',
  'CA04',
  'CA05',
  'CA06',
  'CA07',
  'CA08',
  'CA09',
  'CEMDL',
  'CEMDR',
  'CEMVL',
  'CEMVR',
  'CEPDL',
  'CEPDR',


In [13]:
def extract_largest_scc(edge_list_df):
    G = nx.from_pandas_edgelist(edge_list_df, 'Source','Target',edge_attr=True,create_using=nx.DiGraph)
    scc_list = list(nx.strongly_connected_components(G))
    largest_scc = max(scc_list, key=len)
    largest_scc_subgraph = G.subgraph(largest_scc).copy()
    largest_scc_edges = list(largest_scc_subgraph.edges(data=True))
    largest_scc_el_df = pd.DataFrame(largest_scc_edges, columns=['Source','Target','attr_dict'])
    attr_df = largest_scc_el_df['attr_dict'].apply(pd.Series)
    largest_scc_el_df = pd.concat([largest_scc_el_df.drop(columns='attr_dict'), attr_df], axis=1)
    return largest_scc_el_df


In [14]:
cook_herm_chem_scc_el = extract_largest_scc(cook_herm_chem_el)
cook_herm_elec_scc_el = extract_largest_scc(cook_herm_elec_el)
cook_herm_combined_scc_el = extract_largest_scc(cook_herm_combined_el)

cook_male_chem_scc_el = extract_largest_scc(cook_male_chem_el)
cook_male_elec_scc_el = extract_largest_scc(cook_male_elec_el)
cook_male_combined_scc_el = extract_largest_scc(cook_male_combined_el)

#### Save the full network edgelists

In [15]:
cook_herm_chem_el.to_csv(Path(full_path.parent, 'data','cook_herm_chem_edgelist.csv'), index=False)
cook_herm_elec_el.to_csv(Path(full_path.parent, 'data','cook_herm_elec_edgelist.csv'), index=False)
cook_herm_combined_el.to_csv(Path(full_path.parent, 'data','cook_herm_combined_edgelist.csv'), index=False)

cook_male_chem_el.to_csv(Path(full_path.parent, 'data','cook_male_chem_edgelist.csv'), index=False)
cook_male_elec_el.to_csv(Path(full_path.parent, 'data','cook_male_elec_edgelist.csv'), index=False)
cook_male_combined_el.to_csv(Path(full_path.parent, 'data','cook_male_combined_edgelist.csv'), index=False)

#### save the strongly connected component edgelists

In [16]:
cook_herm_chem_scc_el.to_csv(Path(full_path.parent, 'data','cook_herm_chem_strongly_connected_edgelist.csv'), index=False)
cook_herm_elec_scc_el.to_csv(Path(full_path.parent, 'data','cook_herm_elec_strongly_connected_edgelist.csv'), index=False)
cook_herm_combined_scc_el.to_csv(Path(full_path.parent, 'data','cook_herm_combined_strongly_connected_edgelist.csv'), index=False)

cook_male_chem_scc_el.to_csv(Path(full_path.parent, 'data','cook_male_chem_strongly_connected_edgelist.csv'), index=False)
cook_male_elec_scc_el.to_csv(Path(full_path.parent, 'data','cook_male_elec_strongly_connected_edgelist.csv'), index=False)
cook_male_combined_scc_el.to_csv(Path(full_path.parent, 'data','cook_male_combined_strongly_connected_edgelist.csv'), index=False)

#### Convert the strongly connected components edgelists to adjacency matrices and save them

In [17]:
def edgelist_to_am(edgelist): #edgelist has to have 'Source', 'Target', and 'Weight'
    
    #get the nodes list
    nodes = edgelist.loc[:, 'Source'].tolist() + edgelist.loc[:, 'Target'].tolist()
    # find the uniq nodes
    nodes_uniq = sorted(list(set(nodes)))

    # map each node(string) with a sequential numerical ID to feed the AM
    nodes_map = [(i, nodes_uniq[i]) for i in range(len(nodes_uniq))]

    # replace the string neuron names in the original edgelist dataframe with its corresponding numerical ID
    for i in range(len(nodes_map)):
        edgelist =edgelist.replace(nodes_map[i][1], nodes_map[i][0])

    # Usee coo_matrix to convert the edgelist into sparse matrix and then .todense() to get the dense AM
    weight = edgelist['Weight'].to_numpy()
    source = edgelist['Source'].to_numpy()
    target = edgelist['Target'].to_numpy()
    
    AM = coo_matrix((weight, (source,target)),shape=(len(nodes_uniq), len(nodes_uniq)))
    AM = AM.todense()
    AM_df =pd.DataFrame(AM,columns=nodes_uniq, index=nodes_uniq)
    return AM_df

In [18]:
cook_herm_chem_scc_am = edgelist_to_am(cook_herm_chem_scc_el)
cook_herm_chem_scc_am.to_csv(Path(full_path.parent, 'data','cook_herm_chem_strongly_connected_AM.csv'))

cook_herm_elec_scc_am = edgelist_to_am(cook_herm_elec_scc_el)
cook_herm_elec_scc_am.to_csv(Path(full_path.parent, 'data','cook_herm_elec_strongly_connected_AM.csv'))

cook_herm_combined_scc_am = edgelist_to_am(cook_herm_combined_scc_el)
cook_herm_combined_scc_am.to_csv(Path(full_path.parent, 'data','cook_herm_combined_strongly_connected_AM.csv'))

cook_male_chem_scc_am = edgelist_to_am(cook_male_chem_scc_el)
cook_male_chem_scc_am.to_csv(Path(full_path.parent, 'data','cook_male_chem_strongly_connected_AM.csv'))

cook_male_elec_scc_am = edgelist_to_am(cook_male_elec_scc_el)
cook_male_elec_scc_am.to_csv(Path(full_path.parent, 'data','cook_male_elec_strongly_connected_AM.csv'))

cook_male_combined_scc_am = edgelist_to_am(cook_male_combined_scc_el)
cook_male_combined_scc_am.to_csv(Path(full_path.parent, 'data','cook_male_combined_strongly_connected_AM.csv'))

  edgelist =edgelist.replace(nodes_map[i][1], nodes_map[i][0])


## Witvliet et al. data

Witvliet et al. (2021) published connectome (of chemical synpases) dataset for hermaphrodite C. elegans across developmental stages and more than 1 worm. The data is available in Supplementary tables of 'Witviliet et al., Connectomes across development reveal principles of brain maturation, Nature, 2021' : https://doi.org/10.1038/s41586-021-03778-8 (the data is also available at https://www.wormwiring.org/ ). For purpose of comparision, we took dataset7 and dataset8 because they are reconstructed for adult hermaphrodite. The slightly edited original dataset (to include neurons only) is at /data/witvliet_etal_ST2_adjacency_matrices_neurons.xlsx. In this notebook, the preprocessed dataset will be saved as witv_d7/8_adjacency_matrices.csv and wit_d7/8_edgelist.csv

#### Extract the adult adjacency matrices (Dataset7 and Dataset8)

In [19]:
#Dataset 7
wit_d7 = pd.read_excel(Path(full_path.parent, 'data', 'witvliet_etal_ST2_adjacency_matrices_neurons.xlsx'),
                                   sheet_name = 'Dataset7',
                                  index_col='Neurons')
#transpose the dataframe to match the convention of Cook et al. (pre-synaptic as rows and post-synaptic as columns)
wit_d7 = wit_d7.T

#Dataset 8
wit_d8 = pd.read_excel(Path(full_path.parent, 'data', 'witvliet_etal_ST2_adjacency_matrices_neurons.xlsx'),
                                   sheet_name = 'Dataset8',
                                  index_col='Neurons')
#transpose the dataframe to match the convention of Cook et al. (pre-synaptic as rows and post-synaptic as columns)
wit_d8 = wit_d8.T

In [20]:
wit_d7.to_csv(Path(full_path.parent, 'data', 'wit_d7_AM.csv'))
wit_d8.to_csv(Path(full_path.parent, 'data', 'wit_d8_AM.csv'))

#### Convert the adjacency matrices to edgelist format

In [21]:
#dataset7
print('Number of synapse:')
wit_d7_el = wit_d7.stack().reset_index().rename(
    columns={'level_0':'Source','Neurons':'Target', 0:'Weight'}) 
wit_d7_el  = wit_d7_el[wit_d7_el['Weight']!=0]
print('\twit7:', len(wit_d7_el))

#dataset8
wit_d8_el = wit_d8.stack().reset_index().rename(
    columns={'level_0':'Source','Neurons':'Target', 0:'Weight'}) 
wit_d8_el  = wit_d8_el[wit_d8_el['Weight']!=0]
print('\twit8:', len(wit_d8_el))

Number of synapse:
	wit7: 1944
	wit8: 1933


#### Add source and target cell types 

In [22]:
#load the Witvliet cell types (given as cell classes instead of individual names)
wit_cell_types = pd.read_csv(Path(full_path.parent, 'data','witvliet_etal_ST1_cell_types.csv'))

#dataset 7
wit_d7_el['Source'] = wit_d7_el.Source.apply(lambda x: x.strip())
wit_d7_el['Target'] = wit_d7_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(wit_d7_el, wit_cell_types, 'cell_class')
wit_d7_el['source_ct'] = source_ct
wit_d7_el['target_ct'] = target_ct

#dataset 8
wit_d8_el['Source'] = wit_d8_el.Source.apply(lambda x: x.strip())
wit_d8_el['Target'] = wit_d8_el.Target.apply(lambda x: x.strip())
source_ct, target_ct = assign_cell_types(wit_d8_el, wit_cell_types, 'cell_class')
wit_d8_el['source_ct'] = source_ct
wit_d8_el['target_ct'] = target_ct

#### Check connectedness

In [23]:
wit_d7_G = nx.from_pandas_edgelist(wit_d7_el, 'Source','Target',edge_attr=True,create_using=nx.DiGraph)
wit_d8_G = nx.from_pandas_edgelist(wit_d8_el, 'Source','Target',edge_attr=True,create_using=nx.DiGraph)

print('Wiv7 herm strongly connected:', nx.is_strongly_connected(wit_d7_G))
print('Wiv8 herm strongly connected:', nx.is_strongly_connected(wit_d8_G))

Wiv7 herm strongly connected: False
Wiv8 herm strongly connected: False


In [28]:
wit_d7_G = nx.from_pandas_edgelist(wit_d7_el, 'Source','Target',edge_attr=True,create_using=nx.DiGraph)
wit_d8_G = nx.from_pandas_edgelist(wit_d8_el, 'Source','Target',edge_attr=True,create_using=nx.DiGraph)

print('Wiv7 herm weakly connected:', nx.is_weakly_connected(wit_d7_G))
print('Wiv8 herm weakly connected:', nx.is_weakly_connected(wit_d8_G))

Wiv7 herm weakly connected: True
Wiv8 herm weakly connected: True


In [24]:
wit_d7_SCC=list(nx.strongly_connected_components(wit_d7_G))
wit_d7_num_scc_nodes = [len(component) for component in wit_d7_SCC]
wit_d7_largest_scc_percent = (len(max(wit_d7_SCC, key=len))/wit_d7_G.number_of_nodes())*100
print('Witv7 number of nodes in strongly connected component:',wit_d7_num_scc_nodes)
print('\tPercentage of largest component:', wit_d7_largest_scc_percent, '\n')

wit_d8_SCC=list(nx.strongly_connected_components(wit_d8_G))
wit_d8_num_scc_nodes = [len(component) for component in wit_d8_SCC]
wit_d8_largest_scc_percent = (len(max(wit_d8_SCC, key=len))/wit_d8_G.number_of_nodes())*100
print('Witv8 number of nodes in strongly connected component:',wit_d8_num_scc_nodes)
print('\tPercentage of largest component:', wit_d8_largest_scc_percent, '\n')

Witv7 number of nodes in strongly connected component: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 159]
	Percentage of largest component: 88.33333333333333 

Witv8 number of nodes in strongly connected component: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 159, 1]
	Percentage of largest component: 88.33333333333333 



#### Extract strongly sonnceted component

In [25]:
wit_d7_scc_el = extract_largest_scc(wit_d7_el)
wit_d8_scc_el = extract_largest_scc(wit_d8_el)

#### Save the edgelist of Witviliet dataset 7 and 8 (both the original and the strongly connected component)

In [26]:
wit_d7_el.to_csv(Path(full_path.parent, 'data','wit_d7_edgelist.csv'), index=False)
wit_d8_el.to_csv(Path(full_path.parent, 'data','wit_d8_edgelist.csv'), index=False)

wit_d7_scc_el.to_csv(Path(full_path.parent, 'data','wit_d7_strongly_connected_edgelist.csv'), index=False)
wit_d8_scc_el.to_csv(Path(full_path.parent, 'data','wit_d8_strongly_connected_edgelist.csv'), index=False)

#### Convert the strongly connected component to AM and save

In [27]:
wit_d7_scc_am = edgelist_to_am(wit_d7_scc_el)
wit_d7_scc_am.to_csv(Path(full_path.parent, 'data','wit_d7_strongly_connected_AM.csv'))

wit_d8_scc_am = edgelist_to_am(wit_d8_scc_el)
wit_d8_scc_am.to_csv(Path(full_path.parent, 'data','wit_d8_strongly_connected_AM.csv'))

  edgelist =edgelist.replace(nodes_map[i][1], nodes_map[i][0])
