In [2]:
import pandas as pd
import scipy.sparse as ss

from utils import synapses_to_matrix_and_dict

# Synaptic matrix and uuid map

In [8]:
df = pd.read_feather(
    "new_data/flywire_synapses_783.feather", 
    columns = ["pre_pt_root_id", "post_pt_root_id", "connection_score"]
    ).rename(
        columns={
            "pre_pt_root_id": "pre_root_id", 
            "post_pt_root_id": "post_root_id", 
            "connection_score": "syn_count"}
    )
df["syn_count"] = df["syn_count"].astype(int)

In [19]:
mm, synapse_dict = synapses_to_matrix_and_dict(df)

In [22]:
ss.save_npz(f"new_data/new_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
        f"new_data/new_root_id_to_index.csv", index=False
    )

## Grouping connections

In [None]:
# Add synapse counts for repeated pre_root_id and post_root_id pairs
df = df.groupby(["pre_root_id", "post_root_id"]).sum().reset_index()

In [12]:
mm, synapse_dict = synapses_to_matrix_and_dict(df)
ss.save_npz(f"new_data/new_grouped_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
        f"new_data/new_grouped_root_id_to_index.csv", index=False
    )

## Only proofread connections

In [5]:
pc = pd.read_feather(
    "new_data/proofread_connections_783.feather",
        columns = ["pre_pt_root_id", "post_pt_root_id", "syn_count"]
    ).rename(
        columns={
            "pre_pt_root_id": "pre_root_id", 
            "post_pt_root_id": "post_root_id"
            }
    ).groupby(["pre_root_id", "post_root_id"]).sum().reset_index()

In [5]:
pc.to_csv("new_data/connections.csv", index=False)

In [16]:

mm, synapse_dict = synapses_to_matrix_and_dict(pc)
ss.save_npz(f"new_data/proofread_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
        f"new_data/proofread_root_id_to_index.csv", index=False
    )

# Neuron classification

In [3]:
nc = pd.read_table("new_data/Supplemental_file1_neuron_annotations.tsv")

  nc = pd.read_table("new_data/Supplemental_file1_neuron_annotations.tsv")


In [6]:
# find neurons that are in the pre and post root ids of the proofread connections and in nc
pre_nc = nc[nc["root_id"].isin(pc["pre_root_id"])]
post_nc = nc[nc["root_id"].isin(pc["post_root_id"])]
pre_post_nc = pd.concat([pre_nc, post_nc]).drop_duplicates()


In [9]:
pre_post_nc["cell_type"].value_counts()

cell_type
R1-6       7932
Dm3        2545
T2a        1781
Tm3        1746
T4c        1692
           ... 
DNge154       1
CB2716        1
CB3693        1
DNp72         1
CB3428        1
Name: count, Length: 5634, dtype: int64

In [11]:
df

Unnamed: 0,pre_root_id,post_root_id,syn_count
0,720575940379281722,720575940379283258,42
1,720575940379283482,720575940379282970,30
2,720575940379283719,720575940609623376,266
3,720575940379284367,720575940601751816,151
4,720575940379284367,720575940614275070,550
...,...,...,...
76460809,720575940661339009,720575940629746038,61
76460810,720575940661339777,720575940427732749,18
76460811,720575940661339777,720575940433957960,126
76460812,720575940661339777,720575940616982614,391
