In [109]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import os
from scipy import stats
import pickle
import xml.etree.ElementTree as ET
import json

# Update these 2 variables

In [93]:
# ID of mouse (example: G25)
mouse_id = "test_000"

# cellfinder_output_path = 'path/to/root/directory/of/cellfinder/output/data/'
cellfinder_output_path = "/Users/grant/Desktop/mock_df/cellfinder_output/"


### Create new folder in your cellfinder output folder 


In [105]:
# Create new folder in your cellfinder output folder 
new_folder_path = cellfinder_output_path + str(mouse_id) + "_Completed_Analysis" # create the path for the new folder

if not os.path.exists(new_folder_path):
    os.makedirs(new_folder_path)
    print('folder has been created.')
    print(f'{new_folder_path}')
else:
    print('path already exists, skipping...')
    print(f'{new_folder_path}')



path already exists, skipping...
/Users/grant/Desktop/mock_df/cellfinder_output/test_000_Completed_Analysis


### Read in tdTomato & GFP output data from cellfinder

In [106]:
# import cellfinder summary csv, containing cell counts and more 
gfp_df = pd.read_csv(cellfinder_output_path + 'analysis/' + 'gfp_summary.csv')
tdTomato_df = pd.read_csv(cellfinder_output_path + 'analysis/' + 'tdTomato_summary.csv')

# output path for created csv
output_path_csv = new_folder_path + '/' + mouse_id + '_labled_cells.csv'

gfp_df

Unnamed: 0,structure_name,left_cell_count,right_cell_count,total_cells,left_volume_mm3,right_volume_mm3,total_volume_mm3,left_cells_per_mm3,right_cells_per_mm3
0,Olfactory tubercle,6771.0,5166.0,11937.0,2.488875,2.452250,4.941125,2720.506253,2106.636762
1,Piriform area,5348.0,5904.0,11252.0,7.430750,7.541750,14.972500,719.712008,782.842179
2,"Retrosplenial area, dorsal part, layer 1",2026.0,1420.0,3446.0,0.536000,0.553500,1.089500,3779.850746,2565.492322
3,"Retrosplenial area, dorsal part, layer 2/3",1575.0,1281.0,2856.0,0.649250,0.642625,1.291875,2425.876011,1993.386501
4,"Retrosplenial area, lateral agranular part, la...",1413.0,929.0,2342.0,0.333000,0.330250,0.663250,4243.243243,2813.020439
...,...,...,...,...,...,...,...,...,...
663,"Interpeduncular nucleus, intermediate",0.0,0.0,0.0,0.025500,0.022750,0.048250,0.000000,0.000000
664,"Interpeduncular nucleus, dorsomedial",0.0,0.0,0.0,0.013125,0.011875,0.025000,0.000000,0.000000
665,"Interpeduncular nucleus, dorsolateral",0.0,0.0,0.0,0.025250,0.025625,0.050875,0.000000,0.000000
666,"Interpeduncular nucleus, rostrolateral",0.0,0.0,0.0,0.011250,0.012000,0.023250,0.000000,0.000000


# GFP Data

In [108]:
# make list of each brain region with labeled cells
all_gfp_brain_regions = gfp_df['structure_name'].to_list()
print("All Brain Regions: " + str(len(all_gfp_brain_regions)))

# make a list of total cell count for each brain region labeled with GFP
all_gfp_brain_regions_cell_count = gfp_df['total_cells'].to_list()
print("total cells: " + str(sum(all_gfp_brain_regions_cell_count)))

# make dict of brain_regions & cell_count
all_gfp_dictionary = dict(zip(all_gfp_brain_regions, all_gfp_brain_regions_cell_count))
all_gfp_df = pd.DataFrame.from_dict(all_gfp_dictionary,orient='index')

All Brain Regions: 668
total cells: 68313.0


In [97]:
# Find all brain regions with labeled gfp cells, make gfp_df
gfp_cells_df = gfp_df[gfp_df['total_cells'] >= 1]

# make list of each brain region with labeled cells
gfp_brain_regions = gfp_cells_df['structure_name'].to_list()
gfp_brain_regions_sum = len(gfp_brain_regions)
print("GFP Brain Regions With labeled Cells: " + str(gfp_brain_regions_sum))

# make a list of total cell count for each brain region labeled with GFP
gfp_brain_regions_cell_count = gfp_cells_df['total_cells'].to_list()
gfp_labled_cells_sum = sum(gfp_brain_regions_cell_count)
print("total labeled GFP cells: " + str(gfp_labled_cells_sum ))

# make dict of brain_regions & cell_count
gfp_dictionary = dict(zip(gfp_brain_regions, gfp_brain_regions_cell_count))
gfp_df_01 = pd.DataFrame.from_dict(gfp_dictionary,orient='index')



GFP Brain Regions With labeled Cells: 308
total labeled GFP cells: 68313.0


In [120]:
# File path to the desktop
file_path =  new_folder_path + '/gfp_brainregions_list.json'
print(file_path)

# Open a file
with open(file_path, 'w') as f:
    # Save the list to the file
    json.dump(gfp_brain_regions, f)

/Users/grant/Desktop/mock_df/cellfinder_output/test_000_Completed_Analysis/gfp_brainregions_list.json


In [121]:
# File path to the desktop
count_file_path =  new_folder_path + '/gfp_brainregions_count.json'
print(count_file_path)

# Open a file
with open(count_file_path, 'w') as f:
    # Save the list to the file
    json.dump(gfp_brain_regions_cell_count, f)

/Users/grant/Desktop/mock_df/cellfinder_output/test_000_Completed_Analysis/gfp_brainregions_count.json


# tdTomato Data

In [98]:

# make list of each brain region with labeled cells
all_tdTomato_brain_regions = tdTomato_df['structure_name'].to_list()
print("All Brain Regions: " + str(len(all_tdTomato_brain_regions)))

# make a list of total cell count for each brain region labeled with GFP
all_tdTomato_brain_regions_cell_count = tdTomato_df['total_cells'].to_list()
print("total cells: " + str(sum(all_tdTomato_brain_regions_cell_count)))

# make dict of brain_regions & cell_count
all_tdTomato_dictionary = dict(zip(all_tdTomato_brain_regions, all_tdTomato_brain_regions_cell_count))
all_tdTomato_df = pd.DataFrame.from_dict(all_tdTomato_dictionary,orient='index')


All Brain Regions: 668
total cells: 34938475


In [99]:
# create a dictonary of only the 
# Find all brain regions with labeled gfp cells, make gfp_df
tdTomato_cells_df = tdTomato_df[tdTomato_df['total_cells'] >= 1]

# make list of each brain region with labeled cells
tdTomato_brain_regions = tdTomato_cells_df['structure_name'].to_list()
tdTomato_brain_regions_sum = len(tdTomato_brain_regions)
print("tdTomato Brain Regions With labeled Cells: " + str(tdTomato_brain_regions_sum))

# make a list of total cell count for each brain region labeled with GFP
tdTomato_brain_regions_cell_count = tdTomato_cells_df['total_cells'].to_list()
tdTomato_labled_cells_sum = sum(tdTomato_brain_regions_cell_count)
print("total labeled tdTomato cells: " + str(tdTomato_labled_cells_sum))

# make dict of brain_regions & cell_count
tdTomato_dictionary = dict(zip(tdTomato_brain_regions, tdTomato_brain_regions_cell_count))
tdTomato_df_01 = pd.DataFrame.from_dict(tdTomato_dictionary,orient='index')


tdTomato Brain Regions With labeled Cells: 668
total labeled tdTomato cells: 34938475


## Create csv file with summary of gfp and tdTomato Cellcount and Brain Regions

In [100]:
data = {'total whole-brain cell count': [gfp_labled_cells_sum, tdTomato_labled_cells_sum],
        'total brain regions': [gfp_brain_regions_sum,tdTomato_brain_regions_sum],
        'brain regions list': [gfp_brain_regions,all_tdTomato_brain_regions]}

summary_df = pd.DataFrame(data)
summary_df.rename(index={0: 'GFP'}, inplace=True)
summary_df.rename(index={1: 'tdTomato'}, inplace=True)

# Save out summary df
summary_df.to_csv(new_folder_path + '/' + mouse_id + '_summary_df.csv')


summary_df

Unnamed: 0,total whole-brain cell count,total brain regions,brain regions list
GFP,68313.0,308,"[Olfactory tubercle, Piriform area, Retrosplen..."
tdTomato,34938475.0,668,"[Olfactory tubercle, Piriform area, Retrosplen..."


In [101]:
# making a df with percent of gfp cells labled compared to tdTomato
percent_labled = []
i = 0
for count in all_tdTomato_brain_regions_cell_count:
    percent_labled.append(all_gfp_brain_regions_cell_count[i] / count *100)
    i += 1
    

## df for percentage of labled gfp cells vs tdTomato cells

In [102]:
# create df with gfp and tdTomato cell counts, and percent labled
all_gfp_df['tdTomato cell count'] = all_tdTomato_brain_regions_cell_count
all_gfp_df['percent labled gfp/tdTomato'] = percent_labled


# Add the header 'New Column Name' to column at index 0
all_gfp_df.rename(columns={gfp_df_01.columns[0]: 'gfp cell count'}, inplace=True)


In [103]:
# Find all brain regions with labeled gfp cells, make gfp_df
labled_cells_df = all_gfp_df[all_gfp_df['gfp cell count'] >= 1]
labled_cells_df

labled_cells_df.to_csv(output_path_csv)

In [117]:
labled_cells_df[0:20]

Unnamed: 0,gfp cell count,tdTomato cell count,percent labled gfp/tdTomato
Olfactory tubercle,11937.0,50706,23.541593
Piriform area,11252.0,1188,947.138047
"Retrosplenial area, dorsal part, layer 1",3446.0,16233,21.228362
"Retrosplenial area, dorsal part, layer 2/3",2856.0,9762,29.2563
"Retrosplenial area, lateral agranular part, layer 1",2342.0,84720,2.7644
"Primary visual area, layer 2/3",2130.0,66663,3.195176
Caudoputamen,2065.0,12625,16.356436
Anterior olfactory nucleus,1533.0,87717,1.747666
Piriform-amygdalar area,1495.0,94765,1.577587
"Primary visual area, layer 1",1470.0,18005,8.164399


# There are several ways to statistically compare the values at each index location of two lists in Python. Here are a few options:

### 1. Using the scipy library's stats.ttest_ind() function, you can perform a t-test to compare the means of the two lists at each index location. For example:

In [614]:
t, p = stats.ttest_ind(gfp_brain_regions_cell_count, mock_tdTomato_cellcount)
print("t-statistic: ", t)
print("p-value: ", p)

t-statistic:  0.0
p-value:  1.0


### 2. Using the numpy library's corrcoef() function, you can calculate the correlation coefficient between the two lists at each index location. For example:

In [615]:

corr = np.corrcoef(gfp_brain_regions_cell_count, mock_tdTomato_cellcount)[0, 1]
print("correlation coefficient: ", corr)


correlation coefficient:  -0.018119120257418263


### 3.  Using the pandas library, you can create a dataframe from the two lists and use the corr() function to calculate the correlation between the two lists:

In [616]:

df = pd.DataFrame({'list1': gfp_brain_regions_cell_count, 'list2': mock_tdTomato_cellcount})
corr = df['list1'].corr(df['list2'])
print("correlation coefficient: ", corr)


correlation coefficient:  -0.018119120257418263


# load voxel locations of cells 

In [81]:
data = np.load("/Users/grant/Desktop/mock_df/points.npy")

In [84]:
print(len(data))

98726


# Load cell_classification data

In [87]:
tree = ET.parse("/Users/grant/Desktop/mock_df/cell_classification.xml")
root = tree.getroot()

In [91]:
# Iterate over child elements
for child in root:
    print(child.tag, child.attrib)


Image_Properties {}
Marker_Data {}


# load all point 

In [94]:
all_points = pd.read_csv("/Users/grant/Desktop/mock_df/all_points.csv")

In [95]:
all_points

Unnamed: 0,coordinate_raw_axis_0,coordinate_raw_axis_1,coordinate_raw_axis_2,coordinate_atlas_axis_0,coordinate_atlas_axis_1,coordinate_atlas_axis_2,structure_name,hemisphere
0,85,5087,2798,156,5,141,"Retrosplenial area, lateral agranular part, la...",left
1,85,4745,3007,167,5,134,"Retrosplenial area, dorsal part, layer 1",left
2,85,4777,2803,166,5,140,"Retrosplenial area, lateral agranular part, la...",left
3,85,5139,2761,154,5,142,"Retrosplenial area, lateral agranular part, la...",left
4,85,4628,2912,170,5,137,"Retrosplenial area, lateral agranular part, la...",left
...,...,...,...,...,...,...,...,...
68308,1943,6289,2541,92,149,150,Olfactory tubercle,left
68309,1943,6421,2530,89,148,150,Olfactory tubercle,left
68310,1943,6262,2541,93,149,150,Olfactory tubercle,left
68311,1943,6182,2561,95,149,149,Olfactory tubercle,left


# load volumes 

In [97]:
volumes = pd.read_csv("/Users/grant/Desktop/mock_df/volumes.csv")

In [98]:
volumes

Unnamed: 0,structure_name,left_volume_mm3,right_volume_mm3,total_volume_mm3
0,"Tuberomammillary nucleus, ventral part",0.071750,0.074000,0.145750
1,"Primary somatosensory area, mouth, layer 6b",0.072375,0.077500,0.149875
2,internal capsule,1.341625,1.328750,2.670375
3,Principal sensory nucleus of the trigeminal,0.633875,0.659875,1.293750
4,"Primary somatosensory area, trunk, layer 6a",0.160375,0.163000,0.323375
...,...,...,...,...
663,"Interpeduncular nucleus, intermediate",0.025500,0.022750,0.048250
664,"Interpeduncular nucleus, dorsomedial",0.013125,0.011875,0.025000
665,"Interpeduncular nucleus, dorsolateral",0.025250,0.025625,0.050875
666,"Interpeduncular nucleus, rostrolateral",0.011250,0.012000,0.023250
