# Annotation counts
In this notebook we will compile some of the numbers reported in Schlegel _et al._ (2023) straight from the data dump.

In [1]:
import pandas as pd
import numpy as np

# Read the annotation dump
annot = pd.read_csv(
    "../supplemental_files/Supplemental_file1_annotations.tsv",
    sep="\t",  # it's a tab-separated file
    low_memory=False,  # this is just to avoid a datatype warning
)
annot.head()

Unnamed: 0,supervoxel_id,root_id,pos_x,pos_y,pos_z,soma_x,soma_y,soma_z,nucleus_id,flow,...,hemibrain_type,ito_lee_hemilineage,hartenstein_hemilineage,morphology_group,top_nt,top_nt_conf,side,nerve,fbbt_id,status
0,78112261444987077,720575940628857210,109306,50491,3960,104904.0,47464.0,5461.0,2453924.0,intrinsic,...,PS180,SMPpv2_ventral,CP1_ventral,SMPpv2_ventral_3,acetylcholine,0.914499,left,,FBbt_20001935,
1,82475466912542440,720575940626838909,172029,55635,1592,177472.0,56936.0,1429.0,7393349.0,intrinsic,...,,VLPl2_medial,BLAv2_medial,VLPl2_medial_1,acetylcholine,0.638088,right,,,
2,83038623024880664,720575940626046919,180632,58664,1925,180632.0,58664.0,1925.0,7415038.0,intrinsic,...,AVLP429,,,,acetylcholine,0.838454,right,,FBbt_20000538,
3,79801523353604463,720575940630311383,133800,56063,1847,180728.0,61008.0,1630.0,7415013.0,intrinsic,...,AVLP151,putative_primary,putative_primary,,acetylcholine,0.755116,right,,FBbt_20000260,
4,83038554439606237,720575940633370649,180496,57448,2989,180496.0,57448.0,2989.0,7415848.0,intrinsic,...,LC27,,,,acetylcholine,0.886547,right,,FBbt_00051248,


Some overall stats:

In [2]:
print("Number of neurons:")
print(annot[(annot.super_class != "not_a_neuron")].shape[0])

Number of neurons:
127978


In [3]:
print("A breakdown by superclass:")
print(annot.super_class.value_counts())

A breakdown by superclass:
optic                 73656
central               32422
sensory                9708
visual_projection      7851
ascending              2364
descending             1303
not_a_neuron            846
visual_centrifugal      494
motor                   100
endocrine                80
Name: super_class, dtype: int64


In [4]:
print("Sensory neurons in the central brain (i.e. excluding photoreceptor neurons):")
print(annot[(annot.super_class == "sensory") & (annot.cell_class != "visual")].shape[0])

Sensory neurons in the central brain (i.e. excluding photoreceptor neurons):
5495


In [5]:
print("The number of total annotations across all neurons:")
print(
    annot[
        [
            "flow",
            "super_class",
            "cell_class",
            "cell_sub_class",
            "cell_type",
            "hemibrain_type",
            "side",
            "nerve",
            "ito_lee_hemilineage",
            "morphology_group",
            "soma_x",  # whether a soma position has been recorded
        ]
    ]
    .notnull()
    .sum()
    .sum()
)

The number of total annotations across all neurons:
726397


In [6]:
print("The number of total annotations per field:")
print(
    annot[
        [
            "flow",
            "super_class",
            "cell_class",
            "cell_sub_class",
            "cell_type",
            "hemibrain_type",
            "side",
            "nerve",
            "ito_lee_hemilineage",
            "morphology_group",
        ]
    ]
    .notnull()
    .sum()
)

The number of total annotations per field:
flow                   127979
super_class            128824
cell_class              98518
cell_sub_class           6667
cell_type               18602
hemibrain_type          26421
side                   128822
nerve                    9611
ito_lee_hemilineage     36546
morphology_group        28011
dtype: int64


Stats on cell types

In [7]:
# Add a column where we backfill cell type with hemibrain type
# (i.e. `cell_type` is considered to be the "final" type)
annot["type_combined"] = annot.cell_type
annot.loc[annot.type_combined.isnull(), "type_combined"] = annot.loc[
    annot.type_combined.isnull(), "hemibrain_type"
]

print("Number of neurons with a cell type:")
print(
    f"{annot.type_combined.notnull().sum()} of {annot.shape[0]} ({annot.type_combined.notnull().sum()/annot.shape[0]:.1%})"
)

Number of neurons with a cell type:
40950 of 128824 (31.8%)


In [8]:
# Subset to all neurons with arbour in the central brain
cent = annot[
    annot.super_class.isin(
        (
            "central",
            "descending",
            "motor",
            "endocrine",
            "sensory",
            "ascending",
            "visual_projection",
            "visual_centrifugal",
        )
    )
    & (
        annot.cell_class != "visual"
    )  # we included all sensory abobve but must exclude visual sensory
]
print("Number of central brain neurons with a cell type:")
print(
    f"{cent.type_combined.notnull().sum()} of {cent.shape[0]} ({cent.type_combined.notnull().sum()/cent.shape[0]:.1%})"
)

Number of central brain neurons with a cell type:
31129 of 50109 (62.1%)


In [9]:
# Subset to all neurons intrinsic to the central brain
cent_int = annot[
    annot.super_class.isin(
        (
            "central",
            "descending",
            "motor",
            "endocrine",
            "visual_centrifugal",
        )
    )
    & (annot.cell_class != "visual")
]
print("Number of central brain-intrinsic neurons with a cell type:")
print(
    f"{cent_int.type_combined.notnull().sum()} of {cent_int.shape[0]} ({cent_int.type_combined.notnull().sum()/cent_int.shape[0]:.1%})"
)

Number of central brain-intrinsic neurons with a cell type:
19667 of 34399 (57.2%)


In [10]:
print("Total number of unique cell types:")
print(annot.type_combined.unique().shape[0])

Total number of unique cell types:
4179


In [11]:
print(
    "Number of cell types based on three hemispheres worth of data (high confidence):"
)
# i.e. for how many final cell types do we have that are based on three
# hemispheres
print(
    annot[annot.hemibrain_type.notnull() & annot.type_combined.notnull()]
    .type_combined.unique()
    .shape[0]
)

print(
    "Number of FlyWire neurons with cell types based on three hemispheres worth of data:"
)
print(annot[annot.hemibrain_type.notnull() & annot.type_combined.notnull()].shape[0])

Number of cell types based on three hemispheres worth of data (high confidence):
3166
Number of FlyWire neurons with cell types based on three hemispheres worth of data:
26421


In [12]:
print("Number of cell types based on just FlyWire left and right:")
# i.e. for how many final cell types do we have that are based on three
# hemispheres
is_new_dn = (  # find new DNs that have no match in the hemibrain
    (annot.super_class == "descending")
    & annot.cell_type.str.contains("DN[a-z]e.*?", regex=True, na=False)
    & annot.hemibrain_type.isnull()
)
print(
    annot[annot.cell_type.str.startswith("CB", na=False) | is_new_dn]
    .type_combined.unique()
    .shape[0]
)

Number of cell types based on just FlyWire left and right:
850


In [13]:
# How many hemibrain cell types did we manage to match?
# Keep in mind that hemibrain_type contains merges such as "SIP078,SIP080"
matched_types = np.unique(
    [
        c.strip()
        for l in annot[annot.hemibrain_type.notnull()].hemibrain_type.unique()
        for c in l.split(",")
    ]
)

# Load the hemibrain meta data we compiled
# Each row in this file is a hemibrain neuron
hb_meta = pd.read_csv("../supplemental_files/Supplemental_file4_hemibrain_meta.csv")

# `matched_types` contains a mix of mostly morphology types but also a few connectivity types
hb_meta["found"] = hb_meta.type.isin(matched_types) | hb_meta.morphology_type.isin(
    matched_types
)

In [14]:
hb_types = hb_meta[hb_meta.type.notnull()].drop_duplicates("type")
print("Number of hemibrain types found in Flywire:")
print(
    f"{hb_types.found.sum()} of {hb_types.shape[0]} ({hb_types.found.sum() / hb_types.shape[0]:.1%})"
)

print("Number of hemibrain neurons for which we found types in Flywire:")
print(
    hb_meta.found.sum()
    + annot[annot.hemibrain_type.notnull() & annot.type_combined.notnull()].shape[0]
)

Number of hemibrain types found in Flywire:
3237 of 5620 (57.6%)
Number of hemibrain neurons for which we found types in Flywire:
42687


In [15]:
# How often have we been able to map down to the connectivity-type level?
hb_cn_types = hb_types[hb_types.type != hb_types.morphology_type]
print("Number of hemibrain connectivity types found in FlyWire:")
print(
    f"{hb_cn_types.type.isin(matched_types).sum()} of {hb_cn_types.shape[0]} ({hb_cn_types.type.isin(matched_types).sum() / hb_cn_types.shape[0]:.1%})"
)

Number of hemibrain connectivity types found in FlyWire:
12 of 639 (1.9%)


In [16]:
print("Number of types representing merges of two or more hemibrain types:")
merges = annot[
    annot.hemibrain_type.str.contains(",", na=False)
    & ~annot.hemibrain_type.str.contains("hb", na=False)
]
print(
    f"{merges.hemibrain_type.unique().shape[0]} ({merges.hemibrain_type.unique().shape[0]/annot[annot.hemibrain_type.notnull()].hemibrain_type.unique().shape[0]:.1%})"
)

Number of types representing merges of two or more hemibrain types:
118 (3.9%)


In [17]:
print("Number of hemibrain types contained in those merged cell types:")
in_merge = np.unique(
    [t.strip() for l in merges.hemibrain_type.unique() for t in l.split(",")]
)
print(
    f"{in_merge.shape[0]} ({in_merge.shape[0] / matched_types.shape[0]:.1%} of all matched hemibrain types)"
)

Number of hemibrain types contained in those merged cell types:
295 (9.1% of all matched hemibrain types)
