In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import math
import re

# for making legends 
import matplotlib.patches as mpatches
import matplotlib.lines as mlines

In [None]:
# !pip install bokeh
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, IndexFilter, Spinner, TextInput, CustomJS, Select, LassoSelectTool, Div, Range1d
from bokeh.palettes import Spectral10
from bokeh.io import output_file, show, save
from bokeh.layouts import layout, column, row
from bokeh import events

output_notebook()

In [None]:
# Set font size for tick labels
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
# Set font size for axis labels
plt.rcParams['axes.labelsize'] = 16
# Set font size for title
plt.rcParams['axes.titlesize'] = 22
# Set font size for legend labels
plt.rcParams['legend.fontsize'] = 12
# Adjust the fontsize as needed
plt.rcParams['legend.title_fontsize'] = 14

In [None]:
savefig = False 

# get axon-dendrite connectome 
connectivity matrix downloaded [here](https://www.science.org/doi/10.1126/science.add9330)

In [None]:
ad = pd.read_csv('/Users/yijieyin/Downloads/larva/Supplementary-Data-S1/ad_connectivity_matrix.csv', index_col=0)
ad.head(10)

So the columns and rows are skids, and the values are synapse numbers, not input proportion. So we need the total number of synapses on the dendrites for each skid. 

In [None]:
# sum across rows, for each column 
# ad.sum(axis='rows')
# calculate input proportion. Divide for each column 
ad_inprop = ad.div(ad.sum(axis = 'rows'),axis = 'columns')
ad_inprop.fillna(0, inplace=True)
# turn index into string from int64 
ad_inprop.index = ad_inprop.index.map(str)
ad_inprop.head()

In [None]:
# histogram of total number of postsynapses
ad.sum(axis = 'rows').hist(bins = 50)

# get meta info

In [None]:
# # meta info 
# meta = pd.read_csv('/Users/yijieyin/Downloads/larva/Supplementary-Data-S2.csv')
# meta.head()

In [None]:
meta = pd.read_csv('/Users/yijieyin/Downloads/larva/brain-neurons_meta-data.csv')
meta.head()

In [None]:
# how many cells on one side (roughly)? 
meta.shape

In [None]:
# and which types are there? 
meta.celltype.value_counts()

In [None]:
# what about sub-type?
meta.annotated_name.value_counts()

In [None]:
# how many neurons on the right without a contralateral homologue? 
sum(meta.leftid == 'no pair')

In [None]:
sum(meta.rightid == 'no pair')

In [None]:
# which kind of sensory neurons are there? 
meta[meta.celltype.isin(['sensory'])]['annotated_name'].value_counts()

From Philipp on enteric neurons: 
[Miroschnikow's thesis](https://bonndoc.ulb.uni-bonn.de/xmlui/handle/20.500.11811/9188)  
https://journals.biologists.com/jeb/article/220/10/1774/17783/Pathogen-induced-food-evasion-behavior-in  
there's an annotation in catmaid called sugar sensory  
the enteric neurons are called AN-L/R_sens etc. AN means antennal nerve

## make a type dictionary

In [None]:
# there are many values in the left_id or right_id column that is 'no pair'. So multiple values are assigned to the 'no pair' key. 
# when this happens, only the last value is retained in the dictionary. 
# but this is okay because we don't care about the 'no pair' ids. 
types = dict(zip(pd.concat([meta.leftid, meta.rightid]),
                 pd.concat([meta.celltype,meta.celltype])))
del types['no pair']
# have a look at a few 
dict(list(types.items())[0:5])

In [None]:
# and a neuron name dictionary 
names = dict(zip(pd.concat([meta.leftid, meta.rightid]),
                 pd.concat([meta.left_name,meta.right_name])))

In [None]:
# and a side dictionary 
sides = dict.fromkeys(meta.leftid, 'left')
sides.update(dict.fromkeys(meta.rightid, 'right'))

In [None]:
# additional info from annotated_name column
types_add = dict(zip(pd.concat([meta.leftid, meta.rightid]),
                 pd.concat([meta.annotated_name,meta.annotated_name])))

In [None]:
# check the interconnectivity between select neurons 
selected = [key for key, v in types.items() if (v == 'MBIN') and ('DAN' in types_add[key])]
ad_inprop.loc[selected, selected]

In [None]:
# remove side info and calculate the mean between sides  
# names_noside = dict([(skid, re.sub('( left| right|;right|;left|_left|_right|-R-|-L-|l$|r$|l |r )','', name)) for (skid, name) in names.items()])
# remove things in brackets
names_noside = dict([(skid, re.sub('( left| right|;right|;left|_left|_right|-R-|-L-|l$|r$|l |r |\(.*\))','', name)) for (skid, name) in names.items()])
# map name to type 
nametotype = dict([(names_noside[skid], thistype) for skid, thistype in types.items()])
nametotype_add = dict([(names_noside[skid], thistype) for skid, thistype in types_add.items() if skid != 'no pair'])

# get connectome from catmaid

In [None]:
import pymaid

In [None]:
rm = pymaid.connect_catmaid()

In [None]:
allns = pymaid.find_neurons(annotations = ['Berck, Khandelwal et al. 2016', 
                      'Eschbach, Fushiki et al. 2020', 
                      'Eschbach, Fushiki et al. 2020b', 
                      'Hueckesfeld et al. 2020', 
                      'Larderet, Fritsch et al. 2017', 
                      'Miroschnikow et al. 2018', 
                      'Schlegel et al. 2016', 
                      'Tastekin et al. 2018', 
                      'Winding, Pedigo et al. 2023'])
allns

In [None]:
# how many are neurons? 
unique, counts = np.unique(allns.type, return_counts=True)
dict(zip(unique,counts))

In [None]:
# this seems to get all the sensory neurons, better than the annotation 'mw sens'
# the difference seem to be in the VNC 
anton_sensory = pymaid.find_neurons(annotations = ['all sensory neurons'])
anton_sensory

In [None]:
# update names 
names = {**dict(zip(allns.skeleton_id, allns.name)), **dict(zip(anton_sensory.skeleton_id, anton_sensory.name))}

In [None]:
adj = pymaid.adjacency_matrix(list(names.keys()))
adj

In [None]:
# remove the ones without any postsynapses 
# if you remove these neurons, you remove their presynapses as well. So then you have some new neurons without any postsynapses (without receiving info from anyone)
adj = adj.loc[adj.sum() > 0.5,adj.sum() > 0.5]
inprop = adj.div(adj.sum(), axis = 'columns')
u, counts = np.unique(inprop.sum(), return_counts=True)
dict(zip(u, counts))

Note that this is no longer axo-dendritic. 

In [None]:
notinWP = [names[skid] for skid in names.keys() if skid not in types]
notinWP.sort()
notinWP

In [None]:
# update types
types = {**types, **dict.fromkeys(anton_sensory.skeleton_id, 'sensory')}

In [None]:
lefts = pymaid.find_neurons(annotations = ['mw left'])
rights = pymaid.find_neurons(annotations = ['mw right'])

In [None]:
lefts_notpublished = lefts[[skid not in types.keys() for skid in lefts.skeleton_id]]
rights_notpublished = rights[[skid not in types.keys() for skid in rights.skeleton_id]]
rights_notpublished

# matrix multiplications

## try a few multiplications

In [None]:
# # @ is matrix multiplicaiton 
# once = ini @ ad_inprop
# # within one step, what's the proportion of input accounted for? 
# once.stack().plot.hist(bins = 100)

In [None]:
# once.stack().plot.hist(bins = [0.0001,0.0005,0.001,0.002,0.005,0.1])

In [None]:
# twice = ini @ ad_inprop @ ad_inprop

In [None]:
# twice.stack().plot.hist(bins = [0.0001,0.0005,0.001,0.002,0.005,0.1,0.2,0.5,0.8,1])

In [None]:
# # same as ini @ ad_inprop @ ad_inprop
# ini@np.linalg.matrix_power(ad_inprop, 2)

In [None]:
# # cumulative plot 
# data = twice.stack().values
# data_sorted = np.sort(data)
# p = 1. * np.arange(len(data)) / (len(data) - 1)

# plt.plot(data_sorted,p)

In [None]:
# # have a look at the biggest values 
# data_sorted[-100:-1]

In [None]:
# sum([datum == 0 for datum in data])

In [None]:
# # how many values in total? 
# twice.shape[0]**2

In [None]:
# 8638448/8714304

## total input contributed by senses 
At different steps 

In [None]:
# a function that takes in skids as starting points, and calculate the amount of input contributed by those skids, for n steps 
def generate_steps(skids, ad_inprop, step_number, threshold=0): 
    # create the inital almost-identity matrix 
    ini = ad_inprop.copy()
    # turn all values to 0, then assign 1 to the sensory ones on the diagonal 
    for col in ini.columns:
        ini[col].values[:] = 0
        if col in skids:
            ini.loc[col,col] = 1
    
    steps_fast = []
    for i in range(step_number): 
        # e.g. if step_number is 2, then range(step_number) is [0,1] 
        # the if i==0 block gives 'how many neurons receive direct* input from skids, as shown in the connectome' 
        # then the i=1 step gives 'how many neurons receive input from skids, with one neuron in the middle' 
        if i==0: 
            # the first step of signal propagation 
            unthresholded = ini@np.linalg.matrix_power(ad_inprop, 1)
            steps_fast.append(unthresholded.where(unthresholded>=threshold, 0))
            steps_fast[-1].columns = steps_fast[-1].index.copy()
        else: 
            # multiply the last result  with ad_inprop 
            unthresholded = steps_fast[-1]@ad_inprop
            steps_fast.append(unthresholded.where(unthresholded>=threshold, 0))
    
    return steps_fast

In [None]:
step_number = 15
selectin = [skid for skid in ad_inprop.columns if types[skid] == 'sensory']
steps = generate_steps(selectin, ad_inprop, step_number)

The cell above gives the same results as: 
```
steps = []
for i in range(stepnum): 
    steps.append(ini@np.linalg.matrix_power(ad_inprop, i+1))
but if you did print(np.array_equal(steps[i].values, steps_fast[i].values)), it'll tell you they are not the same 
this is because Python struggles a bit with very small or very large numbers. In your case very small numbers. 
you can check this by the following code: 

for i in range(stepnum): 
    #print(np.array_equal(steps[i].values, steps_fast[i].values))
    print(np.allclose(steps[i].values, steps_fast[i].values))

    print(np.max(np.abs(steps[i].values-steps_fast[i].values)))
```

In [None]:
# sum the first n steps 
def add_steps(steps, n): 
    # n must be 1 or larger 
    m = steps[0].copy()
    # the first step of signal propagation 
    if n==1: 
        return m
    else: 
        for i in range(n-1): 
            m = m + steps[i+1]
        return m

In [None]:
# check a few 
add_steps(steps, 2).sum(axis = 'rows').hist()

In [None]:
add_steps(steps, 8).sum(axis = 'rows').hist()

In [None]:
steps_thresholded = generate_steps(selectin, ad_inprop, step_number, 0.001)
add_steps(steps_thresholded, 14).sum(axis = 'rows').hist()

In [None]:
add_steps(steps, 14).sum(axis = 'rows').hist()

In [None]:
# we should exclude the sensory neurons in the columns, and non-sensory neurons in the rows (since the values are 0 anyway) 
not_sensory = [idx not in selectin for idx in steps[0].index]
sensory = [idx in selectin for idx in steps[0].index]
steps_nosense = []
# choose thresholded or not 
for m in steps: 
    m.columns = m.index
    steps_nosense.append(m.loc[sensory,not_sensory])

In [None]:
steps_nosense[0].shape

In [None]:
add_steps(steps_nosense, 3).sum(axis = 'rows').hist()

In [None]:
add_steps(steps_nosense, 8).sum(axis = 'rows').hist()

In [None]:
add_steps(steps_nosense, 14).sum(axis = 'rows').hist()

## histograms of contributions of different steps

In [None]:
n = 14
stepsn = add_steps(steps_nosense, n)

In [None]:
# sum across multiple input sensory neurons, and take the average of the postsynaptic neurons 
stepsn_noside = stepsn.groupby(names_noside).sum().groupby(names_noside, axis = 1).mean()
stepsn_noside

In [None]:
sensein_step = []
for i in range(len(steps_nosense)): 
    # how much input accounted for at each step for each neuron? 
    sensein_step.append(steps_nosense[i].sum(axis='rows'))

In [None]:
palette = sns.color_palette("Spectral", len(sensein_step))
plt.hist(sensein_step, stacked=True, bins = 20, color = palette)
plt.show()

This shows that all the steps make similar contributions to neurons' inputs: very few neuron has much of their input accounted for by the first step or two (the low y values where x values are high). 

In [None]:
palette

In [None]:
len(steps_nosense)

In [None]:
# sum up all the steps 
sensein_sum = []
for i in range(len(steps_nosense)): 
    sensein_sum.append(add_steps(steps_nosense, i+1).sum(axis = 'rows'))

In [None]:
plt.hist(sensein_sum, stacked=True, bins = 20, color = palette, bottom=0, alpha = 0.5)
plt.show()

On the right of the plot above, you can see that there are neurons that have nearly 100% of their input accounted for by the last step (the neurons in the last bar) (though some of them got there earlier than others). 
The bar on the left is explained by neurons that have little input from the senses in the first few steps. The blue top means that there are a few neurons which don't have a lot of their input accounted for by the senses, even at the end of the steps. 

## cumulative plot per neuron

First get the percentage input accounted for for each neuron at the end of stepping. Then take an average per type. 

In [None]:
mean_sensein_pertype = {}
sensein_pertype = {}
for atype in set(types.values()): 
    if atype == 'sensory': 
        continue 
    # amount of input accounted for by the senses, for this cell type 
    sensein_pertype[atype] = [sensein_sum[-1][skid] for skid in sensein_sum[-1].index 
                              if types[skid] == atype]
    # calculate mean for this cell type
    mean_sensein_pertype[atype] = sum(sensein_pertype[atype]) / len(sensein_pertype[atype])
mean_sensein_pertype = pd.DataFrame.from_dict(mean_sensein_pertype, 
                                              orient='index', 
                                              columns=['Value']).sort_values('Value')
mean_sensein_pertype

In [None]:
# make a dictionary of cell types and colours 
typecolourdict = dict(zip(mean_sensein_pertype.index, 
                          sns.color_palette("Spectral", len(set(types.values()))).as_hex()))
# turn MBIN to black so that it's easier to see 
typecolourdict['MBIN'] = '#0d0d0d'

In [None]:
# save colour mapping for other scripts 
pd.DataFrame(typecolourdict.items(), columns=['type_name', 'colour']).to_csv('/Users/yijieyin/Downloads/larva/type_colour_dict.csv')

In [None]:
# have a look at the colours
sns.color_palette("Spectral", len(set(types.values())))

Red: very little input accounted for by senses;  
Blue: a lot of input accounted for by the senses. 

In [None]:
fig, ax = plt.subplots(figsize = (15,10))
plt.hist(sensein_pertype.values(), stacked=True, bins = 20, label=list(sensein_pertype.keys()), 
        color=[typecolourdict.get(key) for key in sensein_pertype.keys()],
        orientation='horizontal')
plt.ylabel("Input proportion accounted for by the senses in paths <= " + str(n) + ' hops')
plt.xlabel("Number of neurons")
plt.legend()
plt.title('Input proportion accounted for by senses for each cell class')
if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/inprop_bysenses.pdf', bbox_inches='tight')
else: 
    plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (15,10))
for skid in sensein_sum[0].index: 
    xs = [i+1 for i in range(len(sensein_sum))]
    ys = [onesenseinsum[skid] for onesenseinsum in sensein_sum]
    
    plt.plot(xs, ys, alpha = 0.5, color = typecolourdict[types[skid]]) 

plt.xlabel('Number of hops from the senses')
plt.ylabel('Input proportion accounted for by the senses')
plt.title('Cumulative input proportion accounted for by the senses')
if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/inprop_bysenses_stepwise.pdf', bbox_inches='tight')
else: 
    plt.show()

In [None]:
from collections import Counter

filtered_dict = {k: v for k, v in types.items() if v != 'sensory'}

# Count the occurrences of values in the dictionary
value_counts = Counter(filtered_dict.values())

# Extract the labels and their counts
labels, counts = zip(*value_counts.items())
# Sort the labels, counts, and colors based on counts (in descending order)
sorted_indices = sorted(range(len(counts)), key=lambda i: counts[i], reverse=True)
sorted_counts = [counts[i] for i in sorted_indices]
sorted_labels = [labels[i] for i in sorted_indices]
colors = [typecolourdict[label] for label in sorted_labels]

# Create a pie plot
fig, ax = plt.subplots(figsize = (15,10))
ax.pie(sorted_counts, labels=sorted_labels, 
       autopct="%1.1f%%", startangle=90, 
       colors = colors, 
#       wedgeprops=dict(width=0.4, edgecolor="w", linewidth=2), 
      textprops=dict(size=10))
ax.axis("equal")  # Ensure the pie chart is a circle

plt.show()

### select a cell type

In [None]:
# check a particular cell type 
skids = [skid for skid in sensein_sum[0].index if types[skid]=='MBIN' and 'DAN' in types_add[skid]]

celltypes = set([types_add[skid] for skid in skids])
col_typeadd = dict(zip(sorted(celltypes), 
                       sns.color_palette("hls",len(celltypes)).as_hex()))
# Set line style based on 'sides' dictionary
line_styles = {'left': '-', 'right': '--'}

fig, ax = plt.subplots(figsize=(15, 10))
for skid in skids: 
    xs = [i+1 for i in range(len(sensein_sum))]
    ys = [onesenseinsum[skid] for onesenseinsum in sensein_sum]
    
    plt.plot(xs, ys, 
             color=col_typeadd[types_add[skid]], linestyle=line_styles[sides[skid]], 
             label=f"{types_add[skid]} ({sides[skid]})", 
            lw = 3)

plt.xlabel('Steps from the senses')
plt.ylabel('Input proportion accounted for by the senses')

# Create legends
# Legend for colors
color_patches = [mpatches.Patch(color=color, label=celltype) for celltype, color in col_typeadd.items()]
color_legend = plt.legend(handles=color_patches, loc='lower right', bbox_to_anchor=(1, 0.15),
                          title='Cell Types')

# Add the color_legend back to the plot so it doesn't get overwritten by the next legend
plt.gca().add_artist(color_legend)

# Legend for sides
side_lines = [mlines.Line2D([], [], color='black', label=side, linestyle=line_styles[side], linewidth=2) for side in set(sides.values())]
plt.legend(handles=side_lines, loc='lower right', 
           title='Sides')
plt.title("What about Dopaminergic neurons?")
if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/inprop_bysenses_stepwise_DANj.pdf', bbox_inches='tight')
else: 
    plt.show()

But what does that mean? 
which senses? 

In [None]:
# then print the numbers for each cell type 
# check a particular cell type 
skids = [skid for skid in sensein_sum[0].index if types[skid]=='MBIN' and 'DAN' in types_add[skid]]

for skid in skids: 
    xs = [i+1 for i in range(len(sensein_sum))]
    ys = [round(onesenseinsum[skid], 4) for onesenseinsum in sensein_sum]
    print(types_add[skid])
    print(ys)

## not cumulative

In [None]:
fig, ax = plt.subplots(figsize = (15,10))
# for all non-sensories 
for skid in tqdm(sensein_sum[0].index): 
    if types[skid] == 'sensory': 
        continue
    xs = [i+1 for i in range(len(sensein_step))]
    ys = [onestep[skid] for onestep in sensein_step]
    
    plt.plot(xs, ys, alpha = 0.5, color = typecolourdict[types[skid]]) 

plt.xlabel('Number of hops from the senses')
plt.ylabel('Input proportion accounted for by the senses')
plt.title('Input proportion accounted for by the senses')
if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/inprop_bysenses_stepwise_not_cumulative.pdf', bbox_inches='tight')
else: 
    plt.show()

In [None]:
unstacked_df = stepsn.unstack().reset_index(drop=True)

# Set up the figure
fig, ax = plt.subplots(figsize = (15,10))

# Plot the histogram with log scale for both x and y axes
ax.hist(unstacked_df, bins=np.logspace(-3, 0, 40))
ax.set_xscale('log')
ax.set_yscale('log')

# Set labels and title
ax.set_xlabel('Strength of connection (Log Scale)')
ax.set_ylabel('Number of connections (Log Scale)')
ax.set_title('Number of connections vs. strength of connection')

# Show the plot
plt.show()


# DAN's input from senses

In [None]:
# sum the input proportion by sensory type 
sensesn = stepsn.groupby(by = [types_add[idx] for idx in stepsn.index]).sum()
sensesn

In [None]:
# sanity check that it's the right axes 
sensesn.sum(axis = 'rows').hist()

In [None]:
# let's take a look at the dans 
dans_sensein = sensesn.loc[:,[types[skid] == 'MBIN' for skid in sensesn.columns]].T
dans_sensein['cell_type'] = [types_add[skid] for skid in dans_sensein.index]
dans_sensein['side'] = [sides[skid] for skid in dans_sensein.index]
dans_sensein.sort_values('cell_type').head()

In [None]:
# # round and write to clipboard 
# dans_sensein.sort_values('cell_type').round(4).to_clipboard()

In [None]:
vmin = dans_sensein.drop(['cell_type','side'], axis = 1).min().min()
vmax = dans_sensein.drop(['cell_type','side'], axis = 1).max().max()

dans_sensein_dp = dans_sensein.sort_values('cell_type').style.background_gradient(cmap='Blues', vmin = vmin, vmax = vmax)
display(dans_sensein_dp)

In [None]:
# # line plot 
# strictly_dan_sensein = dans_sensein[['DAN' in thistype for thistype in dans_sensein.cell_type]]

# fig, ax = plt.subplots(figsize=(15, 10))
# sensory_columns = strictly_dan_sensein.columns[:-2]

# for idx, row in strictly_dan_sensein.iterrows():
#     plt.plot(sensory_columns, row[sensory_columns], 
#              color=col_typeadd[row['cell_type']], linestyle=line_styles[row['side']], 
#              label=f"{row['cell_type']} ({row['side']})", 
#             lw = 3)

# # plt.xlabel('Sensory modality')
# plt.ylabel('Contribution')
# plt.xticks(np.arange(len(sensory_columns)), sensory_columns, rotation=30)

# # don't actually need these because legends are covered in the previous plots 
# # # Create legends
# # # Legend for cell types
# # color_patches = [mpatches.Patch(color=color, label=celltype) for celltype, color in col_typeadd.items()]
# # color_legend = plt.legend(handles=color_patches, bbox_to_anchor=(1, 1), loc='upper right', title='Cell Types')

# # # Add the color_legend back to the plot so it doesn't get overwritten by the next legend
# # plt.gca().add_artist(color_legend)

# # # Legend for sides
# # side_lines = [mlines.Line2D([], [], color='black', label=side, linestyle=line_styles[side], linewidth=2) for side in set(strictly_dan_sensein['side'])]
# # plt.legend(handles=side_lines, bbox_to_anchor=(1, 0.4), loc='upper right', title='Sides')

# plt.title('How much from which senses?')

# # plt.show()
# plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/inprop_bysenses_DANj.pdf', bbox_inches='tight')

In [None]:
# scatter plot 
strictly_dan_sensein = dans_sensein[['DAN' in thistype for thistype in dans_sensein.cell_type]]

fig, ax = plt.subplots(figsize=(15, 10))
sensory_columns = strictly_dan_sensein.columns[:-2]

x_values = np.arange(len(sensory_columns))

for idx, row in strictly_dan_sensein.iterrows():
    plt.scatter(x_values, row[sensory_columns],
                color=col_typeadd[row['cell_type']],
                label=f"{row['cell_type']} ({row['side']})",
                s=50)  # Set the marker size with the s parameter, adjust as needed

plt.ylabel('Contribution')
plt.xticks(x_values, sensory_columns, rotation=30)
plt.title('How much from which senses?')

if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/inprop_bysenses_DANj.pdf', bbox_inches='tight')
else: 
    plt.show()

## deeper = flatter? 

In [None]:
variance = sensesn.var(axis = 0)
depth = sensesn.sum(axis = 0)
fig, ax = plt.subplots(figsize=(15, 10))
plt.scatter(depth, variance, color = [typecolourdict[types[skid]] for skid in sensesn.columns])
plt.xlabel('Input proportion accounted for') 
plt.ylabel('Variance') 

The bleu ones (PNs and KCs) have biased inputs. For the red-ish ones (e.g. AN, PN-somato, DN-VNC), it does seem like: the more input accounted for, the more biased the input - but perhaps this is an artefact of values being larger? 

**Gini coefficient**: The Gini coefficient is a measure of inequality, originally developed for income distribution in economics, but it can also be applied to your vector. A Gini coefficient of 0 represents perfect equality (a "flat" vector), and a Gini coefficient of 1 represents maximum inequality (a "skewed" vector).  

$$\frac{\sum((2 \cdot \text{{index}} - n - 1) \cdot \text{{vector}})}{n \cdot \sum(\text{{vector}})}$$

In [None]:
def gini(vector):
    vector = np.array(vector)
    vector = np.sort(vector)  # values must be sorted
    index = np.arange(1, vector.shape[0] + 1)  # index per data point
    n = vector.shape[0]  # number of data points
    return ((np.sum((2 * index - n  - 1) * vector)) / (n * np.sum(vector)))  # Gini coefficient

flat_vector = [0.2, 0.2, 0.2, 0.2, 0.2]
skewed_vector = [1, 0, 0, 0, 0]

print(gini(flat_vector))  # prints a low number
print(gini(skewed_vector))  # prints a high number

In [None]:
ginico = sensesn.apply(gini, axis = 0).fillna(0)
ginico

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
plt.scatter(depth, ginico, color = [typecolourdict[types[skid]] for skid in sensesn.columns])
plt.xlabel('Input proportion accounted for') 
plt.ylabel('Gini coefficient') 

Okay. So all are okay except for PNs and KCs which take very biased input. 

In [None]:
# question: this is annotated 'mechanosensory', but why is it gustatory here? 
types_add['16362243']

**Albert on 20/March/2023:**  
The PAM cluster: DAN-ijk and h (h is not there in L1) are involved in reward learning  
DAN activity: signal good things  
enteric must be good-sensing?  

There are 2 sugar sensors that are unsusal: they ascend and cross the midline.  
if you load all the sensories, two ascend in the brain, and they should be in the sensory category, instead of ascending. 
If we hypothesise that DANj1 is looking for positive signal:  
20% of the olfactory input for DANj1: they should be alcohol related  

The ORNs stay close-by in the antennal lobe based on valence:  
    medial: things larva like  
    lateral: doesn't like  

nutrition values isn't by taste  
e.g. L-sugar tastes sweet, but no nutrition value  
DANj is people's favourite  
DANj not reading much from visual: visual is mostly negative in the larva  
respiratory: O2 measurement: so it's positive  

DANc1 is mysterious: it's in the PED, no one knows what they do 

warm sensors: 2 cells go to broad LNs in the AL: normalisation by temperature  
those neurons must go to the mPNs, then some mPNs must go to DANs, so there should be input from warm sensors to DANs    
DANj1 is receiving slightly more from warm than cold.  

cold sensors: synapse onto 3-5 PNs, no LNs, straight to the brain.  
2-5% input of the DANs  

MBIN-e: it's in the e compartment lobe.  

some senses are entirely positive/negative: 
e.g. respiratory measures O2, and O2 is always good  
sensing of heat and cold ramps: [Louis Hermandez Nunez, Cardona, Samuel et al.](https://pubmed.ncbi.nlm.nih.gov/34452914/)  
warm cells: broad LNs: normalise for temperature  
there are also warming PNs: then I-PNs converge from cold and warm  

[Diversity of Internal Sensory Neuron Axon Projection Patterns Is Controlled by the POU-Domain Protein Pdm3 in Drosophila Larvae](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5824742/)   
They are in the trachea: axons to the abdominal segments, then anterial with the dorsolateral tract: when they reach a1 segment, then go medially and go into the brain  
    tracheal sensory neurons that go to the brain  
    brain receive direct input from O2  
DANj1: reads the most from that?  

# dimensionality reduction by senses 

In [None]:
from sklearn.decomposition import PCA

In [None]:
sensesn.T

In [None]:
pca = PCA()
x = sensesn.T
x_pca = pca.fit_transform(x)
x_pca = pd.DataFrame(x_pca)
x_pca.head()

In [None]:
pca.explained_variance_ratio_

In [None]:
# use the following line to add colours in the future 
# x_pca[‘target’]=y
x_pca.columns = ['PC{}'.format(n) for n in range(x_pca.shape[1])]
x_pca.index = x.index

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component 1') 
ax.set_ylabel('Principal Component 2') 
ax.set_title('2 component PCA') 
# targets = [‘Iris-setosa’, ‘Iris-versicolor’, ‘Iris-virginica’]
# colors = [‘r’, ‘g’, ‘b’]
# for target, color in zip(targets,colors):
#  indicesToKeep = x_pca[‘target’] == target
#  ax.scatter(x_pca.loc[indicesToKeep, ‘PC1’]
#  , x_pca.loc[indicesToKeep, ‘PC2’]
#  , c = color
#  , s = 50)
# ax.legend(targets)

ax.scatter(x_pca.PC0, x_pca.PC1)
ax.grid()

In [None]:
# check some cell types based on the plot above 
criteria = [x_pca.loc[i,'PC2']>0.6 for i in x_pca.index]
for i, j in zip([types[key] for key in x.index[criteria]], [types_add[key] for key in x.index[criteria]]): 
    print('{}: {}'.format(i,j))

In [None]:
# check some additional cell types
criteria = [x_pca.loc[i,'PC1']<-0.2 and x_pca.loc[i,'PC2']<-0.4 for i in x_pca.index]
# both PC1 and PC2 at the bottom left corner 
for i, j in zip([types[key] for key in x.index[criteria]], [types_add[key] for key in x.index[criteria]]): 
    print('{}: {}'.format(i,j))

In [None]:
# which kind of PNs are there? 
meta[meta.celltype == 'PN'].annotated_name.value_counts()

## colouring PCA

In [None]:
# select specific cell types to colour 
x_pca['cell_type']= [types[idx] for idx in x_pca.index]

fig = plt.figure(figsize=(15,10))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component 1') 
ax.set_ylabel('Principal Component 2') 
ax.set_title('2 component PCA') 

for target, color in typecolourdict.items():
 indicesToKeep = x_pca['cell_type'] == target
 ax.scatter(x_pca.loc[indicesToKeep, 'PC0']
 , x_pca.loc[indicesToKeep, 'PC1']
 , c = color
 , s = 50, alpha = 0.5, 
           label = target)
ax.legend()
ax.grid()

### interactive PCA

In [None]:
pca = PCA()
x = sensesn.T
x_pca = pca.fit_transform(x)
x_pca = pd.DataFrame(x_pca)
x_pca.columns = ['PC{}'.format(n) for n in range(x_pca.shape[1])]
x_pca.index = x.index
x_pca['type_name'] = [types[skid] for skid in x_pca.index]
x_pca['colour'] = [typecolourdict[atype] for atype in x_pca.type_name]
x_pca['side'] = [sides[skid] for skid in x_pca.index]
x_pca

In [None]:
# plotting 
datasource = ColumnDataSource(x_pca)

plot = figure(
    title='UMAP projection of individual inputs',
    width=1200,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

renderer = plot.circle(
    'PC0',
    'PC1',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)


### Create a Select widget for type_name ###
type_select = Select(title="Select Type:", 
                     options=['all'] + list(x_pca['type_name'].unique()), 
                     value="")

side_select = Select(title="Select Side:", 
                     options=['all'] + list(x_pca['side'].unique()), 
                     value="")


# Define a JavaScript callback function for the widget
callback = CustomJS(args=dict(source=datasource, select_type=type_select, select_side=side_select), code="""
    const selected_type = select_type.value;
    const selected_side = select_side.value;
    const indices = [];
    const data = source.data;
    const typeName = data.type_name;
    const sideName = data.side;
    for (let i = 0; i < typeName.length; i++) {
        if ((typeName[i] === selected_type || selected_type === "all") &&
            (sideName[i] === selected_side || selected_side === "all")) {
            indices.push(i);
        }
    }
    source.selected.indices = indices;
    source.change.emit();
""")

# callback = CustomJS(args=dict(source=datasource, select=type_select), code="""
#     const selected_type = select.value;
#     const indices = [];
#     const data = source.data;
#     const typeName = data.type_name;
#     for (let i = 0; i < typeName.length; i++) {
#         if (typeName[i] === selected_type || selected_type === "all") {
#             indices.push(i);
#         }
#     }
#     source.selected.indices = indices;
#     source.change.emit();
# """)
# Attach the JavaScript callback function to the widget's value change event
type_select.js_on_change('value', callback)
side_select.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(type_select, side_select, plot)

# show result
show(layout)

## U MAP

### sensory modalities

In [None]:
in_from_senses = sensesn.T
# in_from_senses['type_name'] = in_from_senses.index.map(types)
in_from_senses = in_from_senses.groupby(names_noside).mean()
in_from_senses['type_name'] = in_from_senses.index.map(nametotype)
in_from_senses

#### pair plot

In [None]:
pairplot_figure = sns.pairplot(in_from_senses, hue = 'type_name')
# can't save this for some reason 
# pairplot_figure.savefig('/Users/yijieyin/Downloads/pair_plot_from_senses.pdf', bbox_inches='tight')

The more the points are in the middle, the more related the senses are. 

#### umap
"since the measurements are on entirely different scales it will be helpful to convert each feature into z-scores (number of standard deviations from the mean) for comparability." - but our measurements are on the same scale, so no need for this.

In [None]:
# !pip install umap-learn
import umap

In [None]:
reducer = umap.UMAP()

In [None]:
in_from_senses_data = in_from_senses.drop('type_name', axis = 1).values
embedding = reducer.fit_transform(in_from_senses_data)
embedding.shape

In [None]:
fig = plt.figure(figsize=(15,10))
plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=[typecolourdict[nametotype[skid]] for skid in in_from_senses.index])
plt.gca().set_aspect('equal', 'datalim')
plt.title('UMAP projection of input from senses', fontsize=24);

#### interactive

In [None]:
# prepare the data
in_from_senses_bk = pd.concat([in_from_senses,
           pd.DataFrame(embedding, columns=('x', 'y'), index = in_from_senses.index)], axis = 1)
in_from_senses_bk['colour'] = [typecolourdict[atype] for atype in in_from_senses_bk.type_name]
in_from_senses_bk['name'] = in_from_senses_bk.index
in_from_senses_bk['type_add'] = [nametotype_add[skid] for skid in in_from_senses_bk.index]
# change column names because bokeh doesn't like the dashes
in_from_senses_bk.rename(columns = {'gustatory-external':'gustatory_external', 
                                   'gustatory-pharyngeal':'gustatory_pharyngeal', 
                                   'thermo-cold':'thermo_cold', 
                                   'thermo-warm':'thermo_warm'}, inplace=True) 
round_cols = ['gustatory_external','gustatory_pharyngeal','thermo_cold','thermo_warm','enteric','olfactory','respiratory','visual']
in_from_senses_bk[round_cols] = in_from_senses_bk[round_cols].round(3)

# scale text size by value 
base = 10
scale = 10
in_from_senses_bk['olfactory_fontsize'] = in_from_senses_bk['olfactory'].apply(lambda x: str(base+int(x*scale))) # Adjust the scaling factor as needed
in_from_senses_bk['enteric_fontsize'] = in_from_senses_bk['enteric'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk['gustatory_external_fontsize'] = in_from_senses_bk['gustatory_external'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk['gustatory_pharyngeal_fontsize'] = in_from_senses_bk['gustatory_pharyngeal'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk['respiratory_fontsize'] = in_from_senses_bk['respiratory'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk['cold_fontsize'] = in_from_senses_bk['thermo_cold'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk['warm_fontsize'] = in_from_senses_bk['thermo_warm'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk['visual_fontsize'] = in_from_senses_bk['visual'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk

In [None]:
datasource = ColumnDataSource(in_from_senses_bk)

plot = figure(
    title='UMAP projection of input from senses',
    width=800,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot.add_tools(HoverTool(tooltips="""
<div>
    <div><span style='font-size: 15px'> @type_name; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)


### Create a Select widget for type_name ###
type_select = Select(title="Select Type:", 
                     options=['all'] + list(in_from_senses_bk['type_name'].unique()), 
                     value="")

# Define a JavaScript callback function for the widget
callback = CustomJS(args=dict(source=datasource, select=type_select), code="""
    const selected_type = select.value;
    const indices = [];
    const data = source.data;
    const typeName = data.type_name;
    for (let i = 0; i < typeName.length; i++) {
        if (typeName[i] === selected_type || selected_type === "all") {
            indices.push(i);
        }
    }
    source.selected.indices = indices;
    source.change.emit();
""")
# Attach the JavaScript callback function to the widget's value change event
type_select.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(type_select, plot)

# show result
show(layout)

In [None]:
# save it 
if savefig: 
    output_file("/Users/yijieyin/Downloads/interactive_umap_from_senses_by_type.html")  # Specifies the output file
    save(layout)  # Saves the plot to the file

#### sided input

In [None]:
in_from_senses_sided = sensesn.T
in_from_senses_sided['type_name'] = in_from_senses_sided.index.map(types)

in_from_senses_data_sided = in_from_senses_sided.drop('type_name', axis = 1).values
embedding_sided = reducer.fit_transform(in_from_senses_data_sided)
embedding_sided.shape

# umap 
reducer = umap.UMAP()

# prepare data 
in_from_senses_bk_sided = pd.concat([in_from_senses_sided,
           pd.DataFrame(embedding_sided, columns=('x', 'y'), index = in_from_senses_sided.index)], axis = 1)
in_from_senses_bk_sided['colour'] = [typecolourdict[atype] for atype in in_from_senses_bk_sided.type_name]
in_from_senses_bk_sided['name'] = [names[skid] for skid in in_from_senses_bk_sided.index]
in_from_senses_bk_sided['type_add'] = [types_add[skid] for skid in in_from_senses_bk_sided.index]
# change column names because bokeh doesn't like the dashes
in_from_senses_bk_sided.rename(columns = {'gustatory-external':'gustatory_external', 
                                   'gustatory-pharyngeal':'gustatory_pharyngeal', 
                                   'thermo-cold':'thermo_cold', 
                                   'thermo-warm':'thermo_warm'}, inplace=True) 
round_cols = ['gustatory_external','gustatory_pharyngeal','thermo_cold','thermo_warm','enteric','olfactory','respiratory','visual']
in_from_senses_bk_sided[round_cols] = in_from_senses_bk_sided[round_cols].round(3)

# scale text size by value 
base = 10
scale = 10
in_from_senses_bk_sided['olfactory_fontsize'] = in_from_senses_bk_sided['olfactory'].apply(lambda x: str(base+int(x*scale))) # Adjust the scaling factor as needed
in_from_senses_bk_sided['enteric_fontsize'] = in_from_senses_bk_sided['enteric'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk_sided['gustatory_external_fontsize'] = in_from_senses_bk_sided['gustatory_external'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk_sided['gustatory_pharyngeal_fontsize'] = in_from_senses_bk_sided['gustatory_pharyngeal'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk_sided['respiratory_fontsize'] = in_from_senses_bk_sided['respiratory'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk_sided['cold_fontsize'] = in_from_senses_bk_sided['thermo_cold'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk_sided['warm_fontsize'] = in_from_senses_bk_sided['thermo_warm'].apply(lambda x: str(base+int(x*scale)))
in_from_senses_bk_sided['visual_fontsize'] = in_from_senses_bk_sided['visual'].apply(lambda x: str(base+int(x*scale)))

# plotting 
datasource = ColumnDataSource(in_from_senses_bk_sided)

plot = figure(
    title='UMAP projection of input from senses',
    width=800,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot.add_tools(HoverTool(tooltips="""
<div>
    <div><span style='font-size: 15px'> @type_name; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)


### Create a Select widget for type_name ###
type_select = Select(title="Select Type:", 
                     options=['all'] + list(in_from_senses_bk_sided['type_name'].unique()), 
                     value="")

# Define a JavaScript callback function for the widget
callback = CustomJS(args=dict(source=datasource, select=type_select), code="""
    const selected_type = select.value;
    const indices = [];
    const data = source.data;
    const typeName = data.type_name;
    for (let i = 0; i < typeName.length; i++) {
        if (typeName[i] === selected_type || selected_type === "all") {
            indices.push(i);
        }
    }
    source.selected.indices = indices;
    source.change.emit();
""")
# Attach the JavaScript callback function to the widget's value change event
type_select.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(type_select, plot)

# show result
show(layout)

### all input

In [None]:
reducer = umap.UMAP()

in_from_senses_data = stepsn_noside.T.values
embedding = reducer.fit_transform(in_from_senses_data)
# embedding.shape: (1664, 2)

# prepare the data
all_sensory_bk = pd.concat([stepsn_noside.T,
           pd.DataFrame(embedding, columns=('x', 'y'), index = in_from_senses.index)], axis = 1)
all_sensory_bk = all_sensory_bk.round(3)

# inherit the font size and meta information from in_from_senses_bk
cols_to_merge = list(set(in_from_senses_bk.columns) - set(['x','y']))
all_sensory_bk = pd.merge(all_sensory_bk, in_from_senses_bk[cols_to_merge], left_index=True, right_index=True)
all_sensory_bk

In [None]:
datasource = ColumnDataSource(all_sensory_bk)

plot = figure(
    title='UMAP projection of individual inputs [select type]',
    width=1200,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot.add_tools(HoverTool(tooltips="""
<div>
    <div><span style='font-size: 15px'>@type_add; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)


### Create a Select widget for type_name ###
type_select = Select(title="Select Type:", 
                     options=['all'] + list(all_sensory_bk['type_name'].unique()), 
                     value="")

# Define a JavaScript callback function for the widget
callback = CustomJS(args=dict(source=datasource, select=type_select), code="""
    const selected_type = select.value;
    const indices = [];
    const data = source.data;
    const typeName = data.type_name;
    for (let i = 0; i < typeName.length; i++) {
        if (typeName[i] === selected_type || selected_type === "all") {
            indices.push(i);
        }
    }
    source.selected.indices = indices;
    source.change.emit();
""")
# Attach the JavaScript callback function to the widget's value change event
type_select.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(type_select, plot)

# show result
show(layout)

In [None]:
# save it 
if savefig: 
    output_file("/Users/yijieyin/Downloads/interactive_umap_all_input_by_type.html")  # Specifies the output file
    save(layout)  # Saves the plot to the file

the types are in the names

In [None]:
datasource = ColumnDataSource(all_sensory_bk)
s2 = ColumnDataSource(all_sensory_bk)

plot = figure(
    title='UMAP projection of individual inputs [select type - selective hover]',
    width=1200,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)
renderer2 = plot.circle(
    'x',
    'y',
    source=s2,
    alpha=0  # set alpha to 0 to make this renderer invisible
)

plot.add_tools(HoverTool(renderers=[renderer2], tooltips="""
<div>
    <div><span style='font-size: 15px'>@type_add; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

### Create a Select widget for type_name ###
type_select = Select(title="Select Type:", 
                     options=['all'] + list(all_sensory_bk['type_name'].unique()), 
                     value="")

# Define a JavaScript callback function for the widget
callback = CustomJS(args=dict(source1=datasource, source2=s2, select=type_select), code="""
    const selected_type = select.value;
    const indices = [];
    const data1 = source1.data;
    const data2 = source2.data;
    const typeName = data1.type_name;
    for (let i = 0; i < typeName.length; i++) {
        if (typeName[i] === selected_type || selected_type === "all") {
            indices.push(i);
        }
    }
    source1.selected.indices = indices;
    source1.change.emit();

    // Update the data of the second data source to match the selected data
    for (let key in data2) {
        data2[key] = data1[key].filter((_, i) => indices.includes(i));
    }
    source2.change.emit();
""")

# Attach the JavaScript callback function to the widget's value change event
type_select.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(type_select, plot)

# show result
show(layout)

#### with bar plots

In [None]:
stepsn_wmodal = stepsn_noside.copy()
stepsn_wmodal['modality'] = [nametotype_add[name] for name in stepsn_wmodal.index]
# remove things inside brackets to make x axis tick labels smaller 
stepsn_wmodal.index = [re.sub('\(.*\)', '', x) for x in stepsn_wmodal.index]

In [None]:
# Create a dictionary of empty ColumnDataSources, one for each modality
# this specified the sequence of modalities to be plotted 
modalities = ['thermo-cold','thermo-warm','respiratory','visual','olfactory','enteric','gustatory-pharyngeal','gustatory-external']
sources = {modality: ColumnDataSource(data=dict(index=stepsn_wmodal.index[stepsn_wmodal.modality.isin([modality])], 
                                                top=[0]*len(stepsn_wmodal.index[stepsn_wmodal.modality.isin([modality])]))) for modality in modalities}

In [None]:
from bokeh.layouts import row


In [None]:
# Create a bar plot for each modality
bar_plots = []
for modality in modalities:
    this_mod = stepsn_wmodal[stepsn_wmodal.modality.isin([modality])]
    if modality in ['thermo-cold','thermo-warm']: 
        plot = figure(x_range = list(this_mod.index), title=modality, tools='box_select,reset, wheel_zoom', 
                     width = 200, height = 200)
    elif modality in ['visual','respiratory']: 
        plot = figure(x_range = list(this_mod.index), title=modality, tools='box_select,reset, wheel_zoom', 
                     width = 400, height = 200)
    else: 
        plot = figure(x_range = list(this_mod.index), title=modality, tools='box_select,reset, wheel_zoom', 
                     width = 1300, height = 150)
    plot.vbar(x='index', top='top', source=sources[modality], width=0.5)
    # rotate 45 degrees
    plot.xaxis.major_label_orientation = math.pi/4
    plot.xaxis.axis_label_text_font_size = "2pt"
    plot.y_range = Range1d(0, 0.3)
    bar_plots.append(plot)

# scatter plot 
all_sensory_bk['neuron_name'] = all_sensory_bk.index
datasource = ColumnDataSource(all_sensory_bk)
s2 = ColumnDataSource(all_sensory_bk)

plot = figure(
    title='UMAP projection of individual inputs',
    width=500,
    height = 400,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset, tap, lasso_select')
)

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=1,
    nonselection_alpha=0.05
)
# add a transparent one for selective hovering 
renderer2 = plot.circle(
    'x',
    'y',
    source=s2,
    alpha=0  # set alpha to 0 to make this renderer invisible
)

div = Div(text="Selected neuron name: ", width = 400)
type_select = Select(title="Select Type:", 
                     options=['all'] + list(all_sensory_bk['type_name'].unique()), 
                     value="")

# Define a JavaScript callback function for the widget
type_select_callback = CustomJS(args=dict(source1=datasource, source2=s2, select=type_select, div = div), code="""
    const selected_type = select.value;
    const indices = [];
    const names = []; 
    const data1 = source1.data;
    const data2 = source2.data;
    const typeName = data1.type_name;
    const allNames = data1.neuron_name; 
    for (let i = 0; i < typeName.length; i++) {
        if (typeName[i] === selected_type || selected_type === "all") {
            indices.push(i);
            if (selected_type !== 'all'){
                names.push(allNames[i]); 
            }
        }
    }
    source1.selected.indices = indices;
    source1.change.emit();
    div.text = 'Selected neuron name: ' + names; 
    
    // Update the data of the second data source to match the selected data
    for (let key in data2) {
        data2[key] = data1[key].filter((_, i) => indices.includes(i));
    }
    source2.change.emit();
    
    """)
type_select.js_on_change('value', type_select_callback)

lasso_callback = CustomJS(args=dict(source=datasource, div=div), code="""
    const indices = source.selected.indices;
    const original_indices = indices.map(i => source.data.neuron_name[i]);
    const original_indices_string = original_indices.join(", ");
    // Update the text of the Div
    div.text = "Selected Indices: " + original_indices_string;
""")

plot.js_on_event('selectiongeometry', lasso_callback)


# Define a JavaScript callback to update 'stepsn_source' when a circle is selected
stepsn_wmodal_source = ColumnDataSource(stepsn_wmodal)
callback = CustomJS(args=dict(source=datasource, 
                              sources = sources, 
                              stepsn_wmodal = stepsn_wmodal_source, 
                             div = div), 
                    code="""
    var selected_indices = source.selected.indices;
    if (selected_indices.length == 0) {
        return;
    }    
    var selected_skid = source.data['neuron_name'][selected_indices[0]];
    div.text = "Selected neuron name: " + selected_skid;

    // update bar plots 
    for (var modality in sources) {
    var source_to_update = sources[modality];

    var new_data = [];
    for (var i = 0; i < stepsn_wmodal.data['modality'].length; i++) {
        if (stepsn_wmodal.data['modality'][i] === modality) {
            new_data.push(stepsn_wmodal.data[selected_skid][i]);
        }
    }
    
    source_to_update.data['top'] = new_data;
    source_to_update.change.emit();
    
    
}

""")

plot.js_on_event(events.Tap, callback)


# add text on hover 
plot.add_tools(HoverTool(renderers=[renderer2], tooltips="""
<div>
    <div><span style='font-size: 15px'>@type_add; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

plot.legend.location = "top_right"
plot.legend.label_text_font_size = '8pt'

# Combine all plots together
layout = column(row(column(*bar_plots[0:2]), plot, column(*bar_plots[2:4]), column(type_select, div)), *bar_plots[4:])
# layout = column(row(plot, *bar_plots[4:]), *bar_plots[0:4])

# Show the result
show(layout)

TODO take max
PN-somato: mechanosensory, nociceptive 
would be nice if it's in CATMAID 
DANg1 and DANi1 are very similar in the plot above. Perhaps sign flip somewhere? 
Eschbach & Fushiki paper: feedback and feed across - cross compartment neurons - look for these for these DANs 
appetitive DANs that synapse to MBONs that are aversive 
    stimulate appetitive DAN compartment 
    learning involves depression of KC-MBON synapse
some MBONs of the medial lobe might be inhibitory 
associate with appetitive reward, weaken i1 
look at the feed across motifs and MBON i1 and g1 - try to make sense of that. 
go from ORNS with valences to the i1 and g1. 
    and what happens afterwards? 
you can have a section of this on your thesis 
aversive together 
j1 distant 
all the narrative for DANs and MBINs 
b1&b2 similar 
PED different from VL and ML 

#### sided

In [None]:
reducer = umap.UMAP()

in_from_senses_data = stepsn.T.values
embedding = reducer.fit_transform(in_from_senses_data)
embedding.shape

# prepare the data
all_sensory_bk_sided = pd.concat([stepsn.T,
           pd.DataFrame(embedding, columns=('x', 'y'), index = stepsn.T.index)], axis = 1)
all_sensory_bk_sided = all_sensory_bk_sided.round(3)
all_sensory_bk_sided['side'] = [sides[skid] for skid in all_sensory_bk_sided.index]

# inherit the font size and meta information from in_from_senses_bk
cols_to_merge = list(set(in_from_senses_bk_sided.columns) - set(['x','y']))
all_sensory_bk_sided = pd.merge(all_sensory_bk_sided, in_from_senses_bk_sided[cols_to_merge], left_index=True, right_index=True)
all_sensory_bk_sided

In [None]:
# plotting 
datasource = ColumnDataSource(all_sensory_bk_sided)

plot = figure(
    title='UMAP projection of individual inputs',
    width=1200,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot.add_tools(HoverTool(tooltips="""
<div>
    <div><span style='font-size: 15px'>@type_add; @name; @side: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)


### Create a Select widget for type_name ###
type_select = Select(title="Select Type:", 
                     options=['all'] + list(all_sensory_bk_sided['type_name'].unique()), 
                     value="")

side_select = Select(title="Select Side:", 
                     options=['all'] + list(all_sensory_bk_sided['side'].unique()), 
                     value="")


# Define a JavaScript callback function for the widget
callback = CustomJS(args=dict(source=datasource, select_type=type_select, select_side=side_select), code="""
    const selected_type = select_type.value;
    const selected_side = select_side.value;
    const indices = [];
    const data = source.data;
    const typeName = data.type_name;
    const sideName = data.side;
    for (let i = 0; i < typeName.length; i++) {
        if ((typeName[i] === selected_type || selected_type === "all") &&
            (sideName[i] === selected_side || selected_side === "all")) {
            indices.push(i);
        }
    }
    source.selected.indices = indices;
    source.change.emit();
""")

# Attach the JavaScript callback function to the widget's value change event
type_select.js_on_change('value', callback)
side_select.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(row(type_select, side_select), plot)

# show result
show(layout)

##### skid select

In [None]:
datasource = ColumnDataSource(all_sensory_bk_sided)

plot = figure(
    title='UMAP projection of individual inputs [query by skid]',
    width=1200,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot.add_tools(HoverTool(tooltips="""
<div>
    <div><span style='font-size: 15px'>@type_add; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=0.8,
    nonselection_alpha=0.05
)

### select skids ### 
# Create a TextInput widget for entering comma-separated indices
index_input = TextInput(placeholder="Enter ids (comma-separated)", width = 200)

callback = CustomJS(args=dict(source=datasource, index_input=index_input), code="""
    const indices_str = index_input.value;
    const indices = indices_str.split(",").map(x => x.trim());
    console.log("Indices:", indices);
    const data = source.data;
    const index_values = data.index;
    console.log("Index Values:", index_values);
    const selected_indices = [];

    for (let i = 0; i < indices.length; i++) {
        const index = indices[i];
        console.log("Checking index:", index);
        if (index_values.includes(index)) {
            selected_indices.push(index_values.indexOf(index));
        }
    }

    console.log("Selected Indices:", selected_indices);
    source.selected.indices = selected_indices.map(i => parseInt(i));
    source.change.emit();
""")


# Attach the JavaScript callback function to the widget's value change event
index_input.js_on_change('value', callback)

plot.legend.location = "top_left"

layout = column(index_input, plot)

# show result
show(layout)


In [None]:
# save it 
if savefig: 
    output_file("/Users/yijieyin/Downloads/interactive_umap_all_input_by_skid.html")  # Specifies the output file
    save(layout)  # Saves the plot to the file

##### lasso select + skid select 


In [None]:
# all_sensory_bk = all_sensory_bk.reset_index().rename(columns={'index': 'skid'})
all_sensory_bk_sided['skid'] = all_sensory_bk_sided.index

datasource = ColumnDataSource(all_sensory_bk_sided)

plot = figure(
    title='UMAP projection of individual inputs [query by skid + lasso select]',
#     width=1200,
#     plot_height=600,
    tools=('pan, wheel_zoom, reset, lasso_select')
)

renderer = plot.circle(
    'x',
    'y',
    source=datasource,
    color='colour',
    legend_field = 'type_name',
    line_alpha=0.6,
    fill_alpha=0.6,
    size=4, 
    selection_alpha=1,
    nonselection_alpha=0.05
)

### select skids ### 
# Create a TextInput widget for entering comma-separated indices
index_input = TextInput(placeholder="Enter ids (comma-separated)", width = 200)

callback = CustomJS(args=dict(source=datasource, index_input=index_input), code="""
    const indices_str = index_input.value;
    const indices = indices_str.split(",").map(x => x.trim());
    console.log("Indices:", indices);
    const data = source.data;
    const index_values = data.index;
    console.log("Index Values:", index_values);
    const selected_indices = [];

    for (let i = 0; i < indices.length; i++) {
        const index = indices[i];
        console.log("Checking index:", index);
        if (index_values.includes(index)) {
            selected_indices.push(index_values.indexOf(index));
        }
    }

    console.log("Selected Indices:", selected_indices);
    source.selected.indices = selected_indices.map(i => parseInt(i));
    source.change.emit();
""")

# Attach the JavaScript callback function to the widget's value change event
index_input.js_on_change('value', callback)

# Define a Div object
div = Div(text="")
### lasso select ### 
lasso_callback = CustomJS(args=dict(source=datasource, div=div), code="""
    const indices = source.selected.indices;
    const original_indices = indices.map(i => source.data.skid[i]);
    const original_indices_string = original_indices.join(", ");
    // Update the text of the Div
    div.text = "Selected Indices: " + original_indices_string;
""")

plot.js_on_event('selectiongeometry', lasso_callback)

plot.add_tools(HoverTool(tooltips="""
<div>
    <div><span style='font-size: 15px'>@type_add; @name: </span></div>
    <div><span style='font-size: @olfactory_fontsize; color: #fc0303'>Olfactory: @olfactory{0.000}</span></div>
    <div><span style='font-size: @enteric_fontsize; color: #224499'>Enteric: @enteric{0.000}</span></div>
    <div><span style='font-size: @gustatory_external_fontsize; color: #fca103'>Gustatory external: @gustatory_external{0.000}</span></div>
    <div><span style='font-size: @gustatory_pharyngeal_fontsize; color: #f8fc03'>Gustatory_pharyngeal: @gustatory_pharyngeal</span></div>
    <div><span style='font-size: @respiratory_fontsize; color: #4efc03'>Respiratory: @respiratory</span></div>
    <div><span style='font-size: @cold_fontsize; color: #2003fc'>Thermo cold: @thermo_cold</span></div>
    <div><span style='font-size: @warm_fontsize; color: #fc03db'>Thermo warm: @thermo_warm</span></div>
    <div><span style='font-size: @visual_fontsize; color: #03e8fc'>Visual: @visual{0.000}</span></div>
</div>
"""))

plot.legend.location = "top_left"

layout = column(index_input, plot, div)

# show result
show(layout)

In [None]:
# save it 
savefig = True
if savefig: 
    output_file("/Users/yijieyin/Downloads/interactive_umap_all_input_by_skid_lasso.html")  # Specifies the output file
    save(layout)  # Saves the plot to the file

# Zooming in 

## ORNs-DANs

In [None]:
orndan = stepsn.loc[[types_add[skid] =='olfactory' for skid in stepsn.index], 
                    [types[skid] =='MBIN' for skid in stepsn.columns]]
# change skid to neuron names in the columns and rows 
colnames = [types_add[skid]+'_'+sides[skid] for skid in orndan.columns]
# colnames = [types_add[skid] for skid in orndan.columns]
rownames = [names[skid] for skid in orndan.index]
orndan.columns = colnames
orndan.index = rownames
colnames.sort()
rownames.sort()
orndan = orndan.loc[rownames,colnames]
orndan.head()

In [None]:
vmin = orndan.min().min()
vmax = orndan.max().max()

orndan_dp = orndan.style.background_gradient(cmap='Blues', vmin = vmin, vmax = vmax)
display(orndan_dp)

In [None]:
# how much of DAN's input come from ORNs? 
orndan.sum(axis = 'rows').hist()

In [None]:
# how much input does each ORN account for? 
orndan.stack().plot.hist()

## ORNs-KCs

In [None]:
ornkc = stepsn.loc[[types_add[skid] =='olfactory' for skid in stepsn.index], 
                    [types[skid] =='KC' for skid in stepsn.columns]]
rownames = [names[skid] for skid in ornkc.index]
ornkc.index = rownames
rownames.sort()
ornkc = ornkc.loc[rownames,:]
ornkc.head()

In [None]:
# how much of KC's input come from ORNs? 
ornkc.sum(axis = 'rows').hist()

More than DANs. 

In [None]:
vmin = ornkc.min().min()
vmax = ornkc.max().max()

ornkc_dp = ornkc.style.background_gradient(cmap='Blues', vmin = vmin, vmax = vmax)
display(ornkc_dp)

In [None]:
# how much input does each ORN account for? 
ornkc.stack().plot.hist(bins = 50)

## colours

In [None]:
# make a dictionary of colours 
kcdancols = sns.color_palette("icefire", n_colors=len(dans_sensein.cell_type.unique()))
# KC is yellow 
kcdancols.append("yellow")
kcdan_col = dict(zip(np.append(dans_sensein.cell_type.unique(),['KC']), 
                     kcdancols.as_hex()))

## ORN-KC vs. ORN-DAN comparison
### dimensionality reduction

In [None]:
pca = PCA()
x = pd.concat([ornkc.T, orndan.T])
x_pca = pca.fit_transform(x)
x_pca = pd.DataFrame(x_pca)
x_pca.head()

In [None]:
pca.explained_variance_ratio_

In [None]:
x_pca.columns = ['PC{}'.format(n) for n in range(x_pca.shape[1])]
x_pca.index = x.index

In [None]:
kcdan_col_side = kcdan_col.copy()
for dan in kcdan_col.keys(): 
    # add names with sides 
    kcdan_col_side[dan+'_left'] = kcdan_col[dan]
    kcdan_col_side[dan+'_right'] = kcdan_col[dan]

In [None]:
# select specific cell types to colour 
x_pca['cell_type']= [idx if '_' in idx else types_add[idx] for idx in x_pca.index]
# filter if desired 
# x_pca = x_pca[['_' in atype for atype in x_pca.cell_type]]

fig = plt.figure(figsize = (20,12))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component 1') 
ax.set_ylabel('Principal Component 2') 
ax.set_title('2 component PCA on ORN connections') 

# targets = x_pca.cell_type.unique()
# colors = sns.color_palette("icefire", len(targets)).as_hex()
for target, color in kcdan_col_side.items():
 indicesToKeep = x_pca['cell_type'] == target
 ax.scatter(x_pca.loc[indicesToKeep, 'PC1']
 , x_pca.loc[indicesToKeep, 'PC2']
 , c = color
 , s = 50)
ax.legend(kcdan_col)
ax.grid()

Very little variance explained by the first two components

In [None]:
# plot DANs only 
# select specific cell types to colour 
x_pca['cell_type']= [idx if '_' in idx else types_add[idx] for idx in x_pca.index]
# filter if desired 
x_pca_f = x_pca[['_' in atype for atype in x_pca.cell_type]]

fig = plt.figure(figsize = (20,12))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component 1') 
ax.set_ylabel('Principal Component 2') 
ax.set_title('2 component PCA') 

# targets = x_pca.cell_type.unique()
# colors = sns.color_palette("icefire", len(targets)).as_hex()
for target, color in kcdan_col_side.items():
 indicesToKeep = x_pca_f['cell_type'] == target
 ax.scatter(x_pca_f.loc[indicesToKeep, 'PC1']
 , x_pca_f.loc[indicesToKeep, 'PC2']
 , c = color
 , s = 50)
ax.legend(kcdan_col)
ax.grid()

Perhaps dimensionality reduction wasn't the way to go, because there are many more KCs than DANs, so dimensionality reduction would pay more attention to KCs presumably. 

### add valence of ORNs

In [None]:
ornv = {
    '22c': 'negative', 
    '47a': 'negative',
    '85c': 'neutral',
    '24a': 'positive', 
    '42a': 'positive', 
    '42b': 'positive', 
    '82a': 'positive', 
    '83a': 'unknown', 
    '35a': 'positive', 
    '45a': 'positive',
    '30a': 'neutral', 
    '59a': 'negative', 
    '63a': 'unknown', 
    '67b': 'positive', 
    '94a': 'positive', 
    '1a': 'negative', 
    '45b': 'negative', 
    '49a': 'negative', 
    '33a': 'unknown'}    

In [None]:
ornkcdan = pd.concat([ornkc, orndan], axis=1)
vs = []
for typeside in ornkcdan.index: 
    # valence from thesis 
    vfromthesis = [ornv[key] for key in ornv.keys() if key in typeside]
    if len(vfromthesis)==0: 
        vs.append('unknown')
    else: 
        vs.append(vfromthesis[0])
ornkcdan['valence'] = vs
ornkcdan.head()

In [None]:
# could sort values using the following line: 
# ornkcdanv = ornkcdan.groupby('valence').sum().T.sort_values(['negative','positive'])
ornkcdanv = ornkcdan.groupby('valence').sum().T

vmin = ornkcdanv.min().min()
vmax = ornkcdanv.max().max()

ornkcdanv_dp = ornkcdanv.style.background_gradient(cmap='Blues', vmin = vmin, vmax = vmax)
display(ornkcdanv_dp)

In [None]:
# make a dictionary of ORN skids and valences 
ornskids = [skid for skid, name in names.items() if ' ORN ' in name]
orn_skid_v = dict() 
for skid in ornskids: 
    key = [key for key in ornv.keys() if key in names[skid]]
    if len(key)==0:
        orn_skid_v[skid] = 'unknown'
    else: 
        orn_skid_v[skid] = ornv[key[0]]

#### show path lengths with cumulative plot

In [None]:
ns = [skid for skid, name in types_add.items() if 'DAN-j1' in name]
inv = ['positive','negative']
v_colour = {'positive':'red', 'negative':'blue'}
side_linetype = {'left':'-', 'right':'--'}

fig, ax = plt.subplots(figsize = (15,10))
# n has to be one skid at a time, because you can't add input proportions 
for n in ns: 
    for v in inv: 
        ys = [] 
        skids = [key for key,value in orn_skid_v.items() if value==v]

        for i in range(len(steps_nosense)): 
            this_step = steps_nosense[i]
            selected_inprop = this_step.loc[this_step.index.isin(skids), this_step.columns.isin([n])]
            # sum across columns / postsynaptic 
            ys.append(selected_inprop.sum(axis = 0))
        
        # plotting 
        plt.plot([i+1 for i in range(len(steps_nosense))], 
                 ys, 
                 side_linetype[sides[n]], 
                 color = v_colour[v], 
                label = sides[n])
plt.xlabel('Steps from the senses')
plt.ylabel('Percentage input accounted for')
plt.legend()
plt.show()

In [None]:
# cumulative plot 
ns = [skid for skid, name in types_add.items() if 'DAN-j1' in name]
inv = ['positive','negative']
v_colour = {'positive':'red', 'negative':'blue'}

fig, ax = plt.subplots(figsize = (15,20))
# n has to be one skid at a time, because you can't add input proportions 
for n in ns: 
    for v in inv: 
        ys = [] 
        skids = [key for key,value in orn_skid_v.items() if value==v]

        for i in range(len(steps_nosense)): 
            this_step = steps_nosense[i]
            selected_inprop = this_step.loc[this_step.index.isin(skids), this_step.columns.isin([n])]
            # sum across columns / postsynaptic 
            if i==0: 
                ys.append(selected_inprop.sum(axis = 0))
            else: 
                ys.append(ys[-1] + selected_inprop.sum(axis = 0))
        
        # plotting 
        plt.plot([i+1 for i in range(len(steps_nosense))], 
                 ys, 
                 color = v_colour[v])
plt.xlabel('Steps from the senses')
plt.ylabel('Percentage input accounted for')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize = (15,10))

for ctype, col in kcdan_col_side.items(): 
    if '_' in ctype: 
        continue 
    elif ctype=='KC':
        sel = ornkcdanv.loc[['_' not in cell_type for cell_type in ornkcdanv.index]]
        ax.scatter(sel.positive, sel.negative, color = kcdan_col_side[ctype], label = ctype)
    else: 
        sel = ornkcdanv.loc[[ctype in cell_type for cell_type in ornkcdanv.index]]
        ax.scatter(sel.positive, sel.negative, color = kcdan_col_side[ctype], label = ctype)

ax.set_xlabel('Positive')
ax.set_ylabel('Negative')

ax.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))

for ctype, col in col_typeadd.items(): 
    if ('_' not in ctype) and ('DAN' in ctype): 
        sel = ornkcdanv.loc[[ctype in cell_type for cell_type in ornkcdanv.index]]
        ax.scatter(sel.positive, sel.negative, color=col_typeadd[ctype], label=ctype, s = 50)

# Add y=x dashed line
ax.axline((0, 0), slope=1, linestyle='--', color='gray', alpha=0.6)
# Add 'y = x' as text next to the dashed line
ax.text(0.56, 0.8, 'y = x', fontsize=18, color='black', transform=ax.transAxes)

ax.set_xlabel('Positive')
ax.set_ylabel('Negative')

# Set x and y-axis limits to start from 0
ax.set_xlim(left=0)
ax.set_ylim(bottom=0)

ax.legend()
ax.set_title('Olfactory input valence for Dopaminergic neurons')
if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/DAN_olf_valence.pdf', bbox_inches='tight')

Greg's comments:  
for KCs that take input from multiple PNs, do they tend to be positive?  
Davi's work: food odour related are positive  
maybe you can see valence  
reverse is not true: not the case for negative  
for DANs:  
    left/right consistent  
    positive/negative - try other senses?  


### do KCs that take input from more PNs tend to be more positive?

In [None]:
# this line converts the values to boolens, so >0 -> true, =0 -> false. Then summing across column 
ornkc.astype(bool).sum(axis=0).hist()

So all KCs receive direct/indirect input from all ORNs. So the question is a moo point. 
But try with direct connections from PNs? 

## input from senses for KCs and DANs

### KCs: how much input from each senses? 

In [None]:
kc_sensein = sensesn.loc[:,[types[skid] == 'KC' for skid in sensesn.columns]].T
kc_sensein['cell_type'] = [types_add[skid] for skid in kc_sensein.index]
kc_sensein['side'] = [sides[skid] for skid in kc_sensein.index]
kc_sensein.sort_values('cell_type').head()

In [None]:
kc_sensein.shape

In [None]:
vmin = kc_sensein.drop(['cell_type','side'], axis = 1).min().min()
vmax = kc_sensein.drop(['cell_type','side'], axis = 1).max().max()

kc_sensein_dp = kc_sensein.sort_values(['olfactory','thermo-cold','gustatory-external']).style.background_gradient(cmap='Blues', vmin = vmin, vmax = vmax)
display(kc_sensein_dp)

### plot input from different senses for KCs and DANs

In [None]:
kcdan_sensein = pd.concat([kc_sensein, dans_sensein])
kcdan_sensein

In [None]:
cols_to_melt = [col for col in kcdan_sensein.columns if col not in ['cell_type', 'side']]
kcdan_sensein_l = pd.melt(kcdan_sensein, id_vars=['cell_type','side'], 
                          value_vars=cols_to_melt, 
                          value_name='inprop', 
                          var_name = 'sense',
                          ignore_index=False)
kcdan_sensein_l

In [None]:
fig, ax = plt.subplots(figsize = (15,20))
sns.scatterplot(data = kcdan_sensein_l, x = 'sense', y = 'inprop',hue = 'cell_type', palette = kcdan_col, ax = ax)
plt.show()

## enteric to DAN-j

You can get the same neurons in catmaid by searching `enteric sensory` in CATMAID

In [None]:
entdanj = stepsn.loc[[types_add[skid] == 'enteric' for skid in stepsn.index], 
                     [types_add[skid] == 'DAN-j1' for skid in stepsn.columns]]
entdanj.columns = [types_add[skid]+'_'+sides[skid]+'_'+skid for skid in entdanj.columns]
entdanj.index = [names[skid]+'_'+sides[skid]+'_'+skid for skid in entdanj.index]
entdanj.sort_values(['DAN-j1_left_4414163', 'DAN-j1_right_4414184'], ascending=[False, False])

In [None]:
entdanj.sum()

In [None]:
entdanj['DAN-j1_left_4414163'].hist()

The ACa 01 and 02 are annotated `sugar sensory` in CATMAID - they are the only two enteric neurons that go to the protocerebrum

What's a normal amount of input

In [None]:
# comparing against all sensory-non_sensory pairs 
# Prepare data according to types
data_by_type = {}
for skel_id in stepsn.columns:
    this_type = types[skel_id]
    if this_type not in data_by_type:
        data_by_type[this_type] = []
    data_by_type[this_type].append(stepsn[skel_id])

# Concatenate data within each type
data_by_type = {type_: np.concatenate(values) for type_, values in data_by_type.items()}

# Create the stacked histogram
fig, ax = plt.subplots(figsize=(10, 6))

plt.hist(data_by_type.values(), label=list(data_by_type.keys()), 
        color=[typecolourdict.get(key) for key in data_by_type.keys()], alpha = 0.5,
        histtype='barstacked', bins = 30)
ax.hist(entdanj['DAN-j1_left_4414163'], bins=30, color='red', label='DAN-j1_left')

# Label the axes
ax.set_xlabel('Value')
ax.set_ylabel('Frequency')

# Apply log10 transformation to the axes
# ax.set_xscale('log')
ax.set_yscale('log')

# Add the legend
ax.legend()

plt.show()

In [None]:
# there are actually quite a few high values here. 
long_df = stepsn.reset_index().melt(id_vars='index', var_name='post', value_name='weight')
long_df = long_df.rename(columns={'index': 'pre'})
long_df.sort_values('weight', ascending=False).iloc[0:100,:]

In [None]:
# DAN only 
dan_stepsn = stepsn[[col for col in stepsn.columns if 'DAN' in types_add[col]]]
# Prepare data according to types
data_by_type = {}
for skel_id in dan_stepsn.columns:
    this_type = types_add[skel_id]
    if this_type not in data_by_type:
        data_by_type[this_type] = []
    data_by_type[this_type].append(dan_stepsn[skel_id])

# Concatenate data within each type
data_by_type = {type_: np.concatenate(values) for type_, values in data_by_type.items()}

# Create the stacked histogram
fig, ax = plt.subplots(figsize=(15, 10))

plt.hist(data_by_type.values(), label=list(data_by_type.keys()), 
        color=[col_typeadd.get(key) for key in data_by_type.keys()], alpha = 0.3,bins = 30, 
        histtype='stepfilled')
# ax.hist(entdanj['DAN-j1_left_4414163'], bins=30, color='red', label='DAN-j1_left_enteric', alpha = 0.8)
# ax.hist(entdanj['DAN-j1_right_4414184'], bins=30, color='blue', label='DAN-j1_right_enteric', alpha = 0.8)
plt.hist(entdanj, color = ['red','blue'], label = ['enteric->DAN-j1_left','enteric->DAN-j1_right'], 
        bins = 30)


# Label the axes
ax.set_xlabel('Input proportion for Dopaminergic neuron ("weight")')
ax.set_ylabel('Number of connections (log)')

# Apply log10 transformation to the axes
# ax.set_xscale('log')
ax.set_yscale('log')

# Add the legend
ax.legend()
ax.set_title("Enteric input to DAN-j1")

if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/enteric_DANj1.pdf', bbox_inches='tight')
else: 
    plt.show()

which interneuron? 
what's normal amount of input? 

In [None]:
ns = [skid for skid, name in types_add.items() if 'DAN-j1' in name]
ins = [skid for skid, name in names.items() if ('Sens-B1-ACa-02' in name) or ('Sens-B1-ACa-01' in name)]
# # or all enteric neurons
# ins = [skid for skid, name in types_add.items() if 'enteric' in name]

fig, ax = plt.subplots(figsize = (15,10))
# n has to be one skid at a time, because you can't add input proportions 
for this_n in ns: 
    ys = [] 
    for i in range(len(steps_nosense)): 
        this_step = steps_nosense[i]
        selected_inprop = this_step.loc[this_step.index.isin(ins), this_step.columns.isin([this_n])]
        # sum across columns / postsynaptic 
        ys.append(selected_inprop.sum(axis = 0))

    # plotting 
    plt.plot([i+1 for i in range(len(steps_nosense))], 
             ys, 
             side_linetype[sides[this_n]], 
            label = sides[this_n], 
            color = 'black', 
            lw = 3)
plt.xlabel('Number of hops in the polysynaptic chain')
plt.ylabel('Percentage input accounted for')
plt.legend()
plt.title("DAN-j1's input from sugar-sensing neurons")
if savefig: 
    plt.savefig('/Users/yijieyin/Desktop/conferences/PDN_symposium/sugarsensing_DANj1.pdf', bbox_inches='tight')
else: 
    plt.show()

### igraph

In [None]:
import igraph as ig

In [None]:
adj_matrix = ad_inprop.copy()

# Convert adjacency matrix to igraph.Graph object
g = ig.Graph.Weighted_Adjacency(adj_matrix.values.tolist())
g.es['weight'] = adj_matrix.values[adj_matrix.values.nonzero()]
g.vs['skid'] = adj_matrix.index

In [None]:
# Define a function to calculate the total weight of a path
def total_weight(graph, path):
    weight = 1
    for i in range(len(path) - 1):
        weight *= graph[path[i], path[i+1]]
    return weight

In [None]:
# Define the neurons and the maximum number of steps allowed
source_neurons = [key for key, v in names.items() if ('Sens-B1-ACa-01' in v) or ('Sens-B1-ACa-02' in v)]
target_neurons = [key for key, v in types_add.items() if v == 'DAN-j1']
max_steps = 4

# Iterate through all paths between the given neurons and calculate the total weight for each path
all_paths = []
for source in tqdm(source_neurons):
    for target in target_neurons:
        simple_paths = g.get_all_simple_paths(g.vs.find(skid=source).index, 
                                              g.vs.find(skid=target).index, 
                                              cutoff=max_steps)
        for path in simple_paths:
            if len(path) <= max_steps + 1:  # +1 because the path includes the source neuron
                weight = total_weight(g, path)
                all_paths.append((path, weight))
all_paths.sort(key = lambda tup: tup[1], reverse=True)

In [None]:
[(path, weight) for path, weight in all_paths if weight>0.005]

In [None]:
# direct connections 
[(path, weight) for path, weight in all_paths if len(path)==2]

In [None]:
onehop = [(path, weight) for path, weight in all_paths if len(path)==3]
mid = [(path[1], weight) for path, weight in onehop]
# Initialize an empty dictionary to store the sums
sums = {}
sums_skid = {}

# Iterate through the list of tuples
for key, value in mid:
    skid = g.vs[key]['skid']
    # If the key is not in the dictionary, add it with the value as the initial sum
    if key not in sums:
        sums[key] = value
        sums_skid[skid] = value
    # If the key is already in the dictionary, add the value to the existing sum
    else:
        sums[key] += value
        sums_skid[skid] += value

In [None]:
sums_skid

In [None]:
[(names[skid],weight) for skid, weight in sums_skid.items()]

In [None]:
names[g.vs[848]['skid']]

In [None]:
types_add[g.vs[1524]['skid']]

In [None]:
import networkx as nx

inonehop = [(path, round(weight, 5)) for path, weight in all_paths if len(path)<=3]
# Create a directed graph
G = nx.DiGraph()

# Iterate through the strong_paths and add edges to the graph
for path, weight in inonehop:
    if len(path) <= 3:
        for i in range(len(path) - 1):
            G.add_edge(path[i], path[i + 1], weight=weight)

# Assign the 'layer' attribute to the nodes
for node in G.nodes:
    name = types_add[g.vs[node]['skid']]
    if name == 'enteric':
        G.nodes[node]["layer"] = 0
    elif name == 'DAN-j1':
        G.nodes[node]["layer"] = 2
    else:
        G.nodes[node]["layer"] = 1

# Normalize the weights to a suitable range for edge thickness (e.g., between 1 and 5)
weights = [weight for path, weight in inonehop]
scaled_weights = [(w - min(weights)) / (max(weights) - min(weights)) * 4 + 1 for w in weights]

# Create a layout for the nodes
pos = nx.multipartite_layout(G, subset_key="layer")

# Adjust the vertical position of the first and last layers
vertical_spacing = 0.2
for node, coords in pos.items():
    if G.nodes[node]["layer"] == 0:
        pos[node] = (coords[0], coords[1] - vertical_spacing)
    elif G.nodes[node]["layer"] == 2:
        pos[node] = (coords[0], coords[1] + vertical_spacing)

# Draw the graph with edge thickness representing the weights
fig, ax = plt.subplots(figsize = (20,20))
nx.draw(G, pos, with_labels=True, node_color="lightblue", edge_color="gray", font_weight="bold", node_size=1000, width=scaled_weights)

In [None]:
for node in G.nodes: 
    print(types[g.vs[node]['skid']])

### Hugin interneuron 
Hugin PC especially, relevant in bitter taste, also responsible for sensing infection and then stopping the larvae from eating - from [Surendran et al. 2017](https://journals.biologists.com/jeb/article/220/10/1774/17783/Pathogen-induced-food-evasion-behavior-in)

In [None]:
import pymaid
rm = pymaid.connect_catmaid()

In [None]:
hugin = pymaid.find_neurons(annotations='Hugin PC')
hugin

In [None]:
# seems like these neurons are not in the ad connectome 
[skid in types for skid in hugin.skeleton_id]

In [None]:
generate_steps

meta annotation: 'papers' - all the published papers.  
Winding et al., Schlegel 2016, Miroschnikow 2017/2018 - then you should get all the neurons. 

This relates to a broader problem: how to relate any neuron to any neuron in the same way as relating sensory neurons to any neuron? I think it has to do with the initial matrix in matrix multiplications. 

TODO: 
group columns by pairs  
name the skids 

5 VPNs, they connect little to KCs   
Jiaqi chen: model of the visual lobe  
KCs & DANs: are the senses represented in a similar way?  
Cold go to KCs and warm not sure  
warm -> broad, from all ORNs, and warm sensors, olfaction depends on temperature  
cold: from a few PNs, then go straight upstairs   
https://www.science.org/doi/full/10.1126/sciadv.abg6707 


72, 73 KCs on either side  
how do you compare the input from KCs and DANs?  
some KCs specialise in thermo  
representation different from valence extraction? 