In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
working_dir = '/data/datasets/organoid_phenotyping/zika_vs_ctrl/'

os.listdir(working_dir)

['ctrl',
 'zikv',
 'cyto_profiles_combined.npy',
 'model_zikv.umap',
 'cyto_profiles_combined_samples.npy',
 'cyto_labels_combined.npy',
 'zikv_with_FC.xlsx',
 'combined_features.xlsx']

In [3]:
df = pd.read_excel(os.path.join(working_dir, 'combined_features.xlsx'))
df = df.set_index('feature')
df.head(n=21)

Unnamed: 0_level_0,zikv1,ctrl2,ctrl3,ctrl4,zikv5,ctrl1,zikv2,zikv3,zikv4,ctrl5,zikv6
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"DN nbrhd, sox2 count",54.0,76.0,58.0,33.0,66.0,27.0,96.0,113.0,46.0,42.0,143.0
"DN nbrhd, tbr1 count",26.0,24.0,20.0,7.0,29.0,11.0,18.0,24.0,34.0,6.0,53.0
"DN nbrhd, dn count",234403.0,172224.0,133964.0,55746.0,156817.0,51304.0,157125.0,122466.0,154621.0,26693.0,125987.0
"SOX2 nbrhd, sox2 count",234723.0,577631.0,1171108.0,339108.0,378339.0,1347580.0,130690.0,86452.0,111288.0,476390.0,21727.0
"SOX2 nbrhd, tbr1 count",696.0,1771.0,3466.0,1145.0,784.0,5187.0,1373.0,814.0,909.0,2179.0,312.0
"SOX2 nbrhd, dn count",489.0,1116.0,795.0,619.0,891.0,1025.0,195.0,122.0,192.0,540.0,94.0
"TBR1 nbrhd, sox2 count",1668.0,1724.0,1335.0,826.0,1111.0,2596.0,1939.0,2018.0,994.0,925.0,695.0
"TBR1 nbrhd, tbr1 count",75862.0,180185.0,162816.0,133075.0,59679.0,228410.0,61034.0,48238.0,27786.0,142167.0,24306.0
"TBR1 nbrhd, dn count",464.0,2032.0,935.0,1938.0,639.0,1254.0,489.0,250.0,257.0,1239.0,358.0
"DP nbrhd, sox2 count",56319.0,78757.0,76273.0,43974.0,84948.0,128643.0,92738.0,58874.0,46819.0,56913.0,44955.0


In [4]:
organoids = df.columns
nbrhd_names = ['DN', 'SOX2', 'TBR1', 'DP', 'MidTBR1', 'MidSOX2', 'MidInter']

neighborhood_counts = {}
for org in organoids:
    neighborhoods = dict(zip(nbrhd_names, list(range(len(nbrhd_names)))))
    df_org = df[org]
    for feat in df.index[:21]:
        nbrhd = feat.split()[0]
        count = df_org.loc[feat]
        neighborhoods[nbrhd] += count
    neighborhood_counts[org] = neighborhoods

neighborhood_counts

{'zikv1': {'DN': 234483.0,
  'SOX2': 235909.0,
  'TBR1': 77996.0,
  'DP': 112392.0,
  'MidTBR1': 84187.0,
  'MidSOX2': 193458.0,
  'MidInter': 48013.0},
 'ctrl2': {'DN': 172324.0,
  'SOX2': 580519.0,
  'TBR1': 183943.0,
  'DP': 156707.0,
  'MidTBR1': 146479.0,
  'MidSOX2': 212632.0,
  'MidInter': 130675.0},
 'ctrl3': {'DN': 134042.0,
  'SOX2': 1175370.0,
  'TBR1': 165088.0,
  'DP': 152012.0,
  'MidTBR1': 142730.0,
  'MidSOX2': 175955.0,
  'MidInter': 123214.0},
 'ctrl4': {'DN': 55786.0,
  'SOX2': 340873.0,
  'TBR1': 135841.0,
  'DP': 87472.0,
  'MidTBR1': 182720.0,
  'MidSOX2': 75553.0,
  'MidInter': 150189.0},
 'zikv5': {'DN': 156912.0,
  'SOX2': 380015.0,
  'TBR1': 61431.0,
  'DP': 169464.0,
  'MidTBR1': 95380.0,
  'MidSOX2': 277997.0,
  'MidInter': 83865.0},
 'ctrl1': {'DN': 51342.0,
  'SOX2': 1353793.0,
  'TBR1': 232262.0,
  'DP': 256284.0,
  'MidTBR1': 119227.0,
  'MidSOX2': 122144.0,
  'MidInter': 214456.0},
 'zikv2': {'DN': 157239.0,
  'SOX2': 132259.0,
  'TBR1': 63464.0,
  'DP'

In [5]:
neighborhood_proportions = {}
for org in organoids:
    counts = neighborhood_counts[org]
    proportions = {}
    total = sum(counts.values())
    for nbrhd in nbrhd_names:
        frac = counts[nbrhd] / total
        proportions[nbrhd + ' nbrhd frac'] = frac
    neighborhood_proportions[org] = proportions
neighborhood_proportions

{'zikv1': {'DN nbrhd frac': 0.23770677934142845,
  'SOX2 nbrhd frac': 0.23915238464049438,
  'TBR1 nbrhd frac': 0.07906832461847577,
  'DP nbrhd frac': 0.11393721653058783,
  'MidTBR1 nbrhd frac': 0.0853444413130881,
  'MidSOX2 nbrhd frac': 0.19611774891072728,
  'MidInter nbrhd frac': 0.048673104645198174},
 'ctrl2': {'DN nbrhd frac': 0.10883994545497035,
  'SOX2 nbrhd frac': 0.36665616104300003,
  'TBR1 nbrhd frac': 0.11617851307318545,
  'DP nbrhd frac': 0.09897623855302824,
  'MidTBR1 nbrhd frac': 0.09251622739896127,
  'MidSOX2 nbrhd frac': 0.13429850329600784,
  'MidInter nbrhd frac': 0.08253441118084684},
 'ctrl3': {'DN nbrhd frac': 0.06480433530860162,
  'SOX2 nbrhd frac': 0.5682477998811648,
  'TBR1 nbrhd frac': 0.07981392479541058,
  'DP nbrhd frac': 0.07349216379143217,
  'MidTBR1 nbrhd frac': 0.06900466106591001,
  'MidSOX2 nbrhd frac': 0.08506771623241223,
  'MidInter nbrhd frac': 0.059569398925068566},
 'ctrl4': {'DN nbrhd frac': 0.054243636441424534,
  'SOX2 nbrhd frac':

In [6]:
df_props = pd.DataFrame(neighborhood_proportions)
df_props = df_props.reindex(sorted(df_props.columns), axis=1)
df_props

Unnamed: 0,ctrl1,ctrl2,ctrl3,ctrl4,ctrl5,zikv1,zikv2,zikv3,zikv4,zikv5,zikv6
DN nbrhd frac,0.021852,0.10884,0.064804,0.054244,0.025391,0.237707,0.192193,0.227626,0.25705,0.128085,0.328073
DP nbrhd frac,0.10908,0.098976,0.073492,0.085054,0.107622,0.113937,0.226303,0.218417,0.155166,0.138331,0.233226
MidInter nbrhd frac,0.091277,0.082534,0.059569,0.146037,0.140238,0.048673,0.114455,0.088943,0.089156,0.068458,0.109267
MidSOX2 nbrhd frac,0.051987,0.134299,0.085068,0.073464,0.033903,0.196118,0.136024,0.143106,0.19885,0.226924,0.056947
MidTBR1 nbrhd frac,0.050746,0.092516,0.069005,0.177668,0.100874,0.085344,0.091792,0.065887,0.064781,0.077857,0.149002
SOX2 nbrhd frac,0.576203,0.366656,0.568248,0.331449,0.454924,0.239152,0.16166,0.162247,0.186746,0.3102,0.057548
TBR1 nbrhd frac,0.098856,0.116179,0.079814,0.132085,0.137047,0.079068,0.077572,0.093774,0.048251,0.050145,0.065938


In [7]:
df_props.to_excel(os.path.join(working_dir, 'zika_nbrhd_proportions.xlsx'))