In [None]:
import pandas as pd
import scipy.sparse as ss

from utils import synapses_to_matrix_and_dict

# Synaptic matrix and uuid map

In [None]:
df = pd.read_feather(
    "new_data/flywire_synapses_783.feather", 
    columns = ["pre_pt_root_id", "post_pt_root_id", "connection_score"]
    ).rename(
        columns={
            "pre_pt_root_id": "pre_root_id", 
            "post_pt_root_id": "post_root_id", 
            "connection_score": "syn_count"}
    )
df["syn_count"] = df["syn_count"].astype(int)

In [None]:
mm, synapse_dict = synapses_to_matrix_and_dict(df)

In [None]:
ss.save_npz(f"new_data/new_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
        f"new_data/new_root_id_to_index.csv", index=False
    )

## Grouping connections

In [None]:
# Add synapse counts for repeated pre_root_id and post_root_id pairs
df = df.groupby(["pre_root_id", "post_root_id"]).sum().reset_index()

In [None]:
mm, synapse_dict = synapses_to_matrix_and_dict(df)
ss.save_npz(f"new_data/new_grouped_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
        f"new_data/new_grouped_root_id_to_index.csv", index=False
    )

## Only proofread connections

In [None]:
pc = pd.read_feather(
    "new_data/proofread_connections_783.feather",
        columns = ["pre_pt_root_id", "post_pt_root_id", "syn_count"]
    ).rename(
        columns={
            "pre_pt_root_id": "pre_root_id", 
            "post_pt_root_id": "post_root_id"
            }
    ).groupby(["pre_root_id", "post_root_id"]).sum().reset_index()

In [None]:
pc.to_csv("new_data/connections.csv", index=False)

In [None]:

mm, synapse_dict = synapses_to_matrix_and_dict(pc)
ss.save_npz(f"new_data/proofread_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
        f"new_data/proofread_root_id_to_index.csv", index=False
    )

# Refined proofread synaptic matrix

In [None]:
import numpy as np

rpc = pd.read_feather("new_data/proofread_connections_783.feather")
# following https://www-nature-com.sabidi.urv.cat/articles/s41586-024-07763-9,
#  "We assume GABAergic and glutamatergic neurons are inhibitory" and the rest are excitatory
# compute the sum of all the columns ending with "avg"
rpc["syn_count"] = np.where(
    rpc["gaba_avg"] + rpc["glut_avg"] > 0.5, -1 * rpc["syn_count"], rpc["syn_count"]
)

In [None]:
crpc = rpc[["post_pt_root_id", "pre_pt_root_id", "syn_count"]].rename(
    columns={"pre_pt_root_id": "pre_root_id", "post_pt_root_id": "post_root_id"}
).groupby(["pre_root_id", "post_root_id"]).sum().reset_index()

In [None]:
crpc.to_csv("new_data/connections_refined.csv", index=False)

In [None]:
mm, synapse_dict = synapses_to_matrix_and_dict(crpc)
ss.save_npz(f"new_data/proofread_refined_synaptic_matrix.npz", mm)
pd.DataFrame(synapse_dict.items(), columns=["root_id", "index"]).to_csv(
    f"new_data/proofread_refined_root_id_to_index.csv", index=False
)

In [None]:
nc = pd.read_table("new_data/Supplemental_file1_neuron_annotations.tsv")

In [None]:
# find neurons that are in the pre and post root ids of the proofread connections and in nc
pre_nc = nc[nc["root_id"].isin(pc["pre_root_id"])]
post_nc = nc[nc["root_id"].isin(pc["post_root_id"])]
pre_post_nc = pd.concat([pre_nc, post_nc]).drop_duplicates()


In [None]:
pre_post_nc["cell_type"].value_counts()

In [None]:
df