In [1]:
import requests
import numpy as np
import pyarrow.feather as feather

VERSION = 'v0.9'

def load_table(file_name):
    if not os.path.exists(file_name):
        try:
            url = f'https://storage.googleapis.com/flyem-male-cns/{VERSION}/connectome-data/flat-connectome/{file_name}'
            with requests.get(url, stream=True) as r:
                r.raise_for_status()
                with open(file_name, "wb") as f:
                    for chunk in r.iter_content(chunk_size=2**20):
                        f.write(chunk)
        except:
            os.remove(file_name)
            raise

    df = feather.read_feather(file_name).rename(columns={'bodyId': 'body'})
    if 'body' in df.columns:
        df = df.set_index('body')
    return df

def load_annotations_and_weights():
    annotations = load_table(f'body-annotations-male-cns-{VERSION}-minconf-0.5.feather')
    weights = load_table(f'connectome-weights-male-cns-{VERSION}-minconf-0.5.feather')

    weights['superclass_pre'] = weights['body_pre'].map(annotations['superclass'])
    weights['superclass_post'] = weights['body_post'].map(annotations['superclass'])

    weights['type_pre'] = weights['body_pre'].map(annotations['type'])
    weights['type_post'] = weights['body_post'].map(annotations['type'])

    return annotations, weights

In [2]:
annotations, weights = load_annotations_and_weights()

In [3]:
valid_superclasses = [sc for sc in annotations['superclass'].unique() if sc and 'tbc' not in sc]
print(f"There are {len(valid_superclasses)} valid superclasses")

valid_types = [t for t in annotations['type'].unique() if t and 'unclear' not in t and t != "Pm7_Li28"]
print(f"There are {len(valid_types)} valid types")

valid_weights = weights.query('superclass_pre in @valid_superclasses and superclass_post in @valid_superclasses')
valid_weights_neurons = set(np.concat([
    valid_weights['body_pre'].unique(),
    valid_weights['body_post'].unique()
]))
print(f"There are {len(valid_weights)} edges between neurons with valid superclasses, "
      f"connecting {len(valid_weights_neurons)} neurons")

thresholded_valid_weights = valid_weights.loc[valid_weights['weight'] >= 5]
thresholded_valid_weights_neurons = set(np.concat([
    thresholded_valid_weights['body_pre'].unique(),
    thresholded_valid_weights['body_post'].unique()
]))
print(f"After thresholding by connection strength >= 5, there are {len(thresholded_valid_weights)}"
      f" edges between {len(thresholded_valid_weights_neurons)} neurons")

thresholded_type_weights = (
    thresholded_valid_weights
    .query('type_pre in @valid_types and type_post in @valid_types')
    .dropna(subset=['type_pre', 'type_post'])
    .groupby(['type_pre', 'type_post'])['weight'].sum()
)
thresholded_type_edge_types = len({
    *thresholded_type_weights.reset_index()['type_pre'].unique(),
    *thresholded_type_weights.reset_index()['type_post'].unique()
})
print(f"After aggregating thresholded edges by type, there are {len(thresholded_type_weights)}"
      f" type-type edges between {thresholded_type_edge_types} types")

There are 20 valid superclasses
There are 11691 valid types
There are 25563426 edges between neurons with valid superclasses, connecting 166391 neurons
After thresholding by connection strength >= 5, there are 6237402 edges between 165752 neurons
After aggregating thresholded edges by type, there are 929735 type-type edges between 11687 types
