# PV Ensemble Cell Counts
## Can we distinguish two populations of PV cells via cell counts? 
### In particular: PV/cFos+/Npas4- vs PV/Npas4+/cFos-
###### Jonathan Ramos 4/15/2024

In the last round of stats I tried running a bunch of ANOVAs to distinguish differences in intensity between PV/cFos+/Npas4- and PV/Npas4+/cFos- labeled cells to disambiguate two potentially distinct populations of PV cells (ensembles?). Unfortunately there was no indication that intensities differed across reactivation by treatment between these groups, whether or not we consider the presence of PNNs.

In this notebook I will slice out and aggregate smaller datasets to address whether there are differences in cell counts between these two groups. Since all my previous sets that I've been using for intensity analyses do not require rat_n, I haven't been keeping track of which cells come from what animals. Here I need to start from the large FINAL csv and keep track the whole way through so we can compute means of means.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil

In [2]:
df = pd.read_csv('KET-VR5_FINAL.csv').drop(columns=['Unnamed: 0', 'index'])
df

Unnamed: 0,filename,image_name,roi_id,true_grouping,dummy_PV,dummy_cFos,dummy_Npas4,dummy_WFA,CoM_x,CoM_y,...,rat_n,treatment,group_name,snr,mean-background,adjusted_mean-background,true_grouping_literal,matching_len,updated_true_grouping,dummy
0,KET-10-12_PFC_3.7_A_2.tif,KET-10-12_PFC_3.7_A,0-000-00000_PV,"('0-000-00000_PV', '0-FFF-00045_Npas4', '0-FFF...",True,False,True,True,297.86,425.97,...,KET-10-12,FR1_KET,KET-10,2.247372,297.961600,382.724600,"('0-000-00000_PV', '0-FFF-00045_Npas4', '0-FFF...",True,"('0-000-00000_PV', '0-FFF-00045_Npas4', '0-FFF...","(True, False, True, True)"
1,KET-10-12_PFC_3.7_A_2.tif,KET-10-12_PFC_3.7_A,0-000-00001_PV,"('0-000-00001_PV', '0-FFF-00070_cFos', '0-FFF-...",True,True,True,False,340.47,43.89,...,KET-10-12,FR1_KET,KET-10,1.318398,76.056290,160.819290,"('0-000-00001_PV', '0-FFF-00070_cFos', '0-FFF-...",True,"('0-000-00001_PV', '0-FFF-00070_cFos', '0-FFF-...","(True, True, True, False)"
2,KET-10-12_PFC_3.7_A_2.tif,KET-10-12_PFC_3.7_A,0-000-00002_PV,"('0-000-00002_PV', '0-FFF-00044_Npas4', '0-FFF...",True,False,True,True,154.85,476.15,...,KET-10-12,FR1_KET,KET-10,1.356610,85.184100,169.947100,"('0-000-00002_PV', '0-FFF-00044_Npas4', '0-FFF...",True,"('0-000-00002_PV', '0-FFF-00044_Npas4', '0-FFF...","(True, False, True, True)"
3,KET-10-12_PFC_3.7_A_2.tif,KET-10-12_PFC_3.7_A,0-000-00003_PV,"('0-000-00003_PV', '0-FFF-00082_Npas4', '0-FFF...",True,False,True,True,310.10,308.22,...,KET-10-12,FR1_KET,KET-10,1.448608,107.159805,191.922800,"('0-000-00003_PV', '0-FFF-00082_Npas4', '0-FFF...",True,"('0-000-00003_PV', '0-FFF-00082_Npas4', '0-FFF...","(True, False, True, True)"
4,KET-10-12_PFC_3.7_A_2.tif,KET-10-12_PFC_3.7_A,0-000-00004_PV,"('0-000-00004_PV',)",True,False,False,False,44.35,323.64,...,KET-10-12,FR1_KET,KET-10,1.798510,190.741200,275.504200,"('0-000-00004_PV',)",True,"('0-000-00004_PV',)","(True, False, False, False)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18627,PE-13-9_PFC_4.0_B_5.tif,PE-13-9_PFC_4.0_B,0-FFF-00008_WFA,"('0-FFF-00008_WFA',)",False,False,False,True,385.12,235.30,...,PE-13-9,VR5_SAL,PE-13,1.087687,9.611397,26.986496,"('0-FFF-00008_WFA',)",True,"('0-FFF-00008_WFA',)","(False, False, False, True)"
18628,PE-13-9_PFC_4.0_B_5.tif,PE-13-9_PFC_4.0_B,0-FFF-00009_WFA,"('0-000-00002_PV', '0-FFF-00009_WFA')",True,False,False,True,409.79,230.66,...,PE-13-9,VR5_SAL,PE-13,1.175773,19.266396,36.641495,"('0-000-00002_PV', '0-FFF-00009_WFA')",True,"('0-000-00002_PV', '0-FFF-00009_WFA')","(True, False, False, True)"
18629,PE-13-9_PFC_4.0_B_5.tif,PE-13-9_PFC_4.0_B,0-FFF-00010_WFA,"('0-FFF-00010_WFA',)",False,False,False,True,300.24,100.38,...,PE-13-9,VR5_SAL,PE-13,1.129718,14.218300,31.593400,"('0-FFF-00010_WFA',)",True,"('0-FFF-00010_WFA',)","(False, False, False, True)"
18630,PE-13-9_PFC_4.0_B_5.tif,PE-13-9_PFC_4.0_B,0-FFF-00011_WFA,"('0-000-00005_PV', '0-FFF-00011_WFA')",True,False,False,True,414.17,44.01,...,PE-13-9,VR5_SAL,PE-13,1.099090,10.861198,28.236298,"('0-000-00005_PV', '0-FFF-00011_WFA')",True,"('0-000-00005_PV', '0-FFF-00011_WFA')","(True, False, False, True)"


In [3]:
# re using some old code to count cells

def count_imgs(df, sid, iid):
    '''
    takes a dataframe and counts the number of unique strings that occur in the
    "image_name" col for each rat in "rat_n" col
    args:
        df: pd.core.frame.DataFrame(n, m)
            n: the number of rows,
            m: the number of features
        sid: str, denoting the name of the col containing unique subject ids
        iid: str, denoting the name of the col containing unique image ids
    return:
        df_imgn: pd.core.frame.DataFrame(n=|sid|), m=2)
            n: the number of rows, equal to the cardinality of the sid set
            (the number of unique ID strings in sid)
            this df contains 2 cols: a sid col, and an iid col containing counts
    '''
    assert iid in df.columns

    df_imgn = df.groupby([sid])[[sid, iid]]\
        .apply(lambda x: len(np.unique(x[iid])))\
        .reset_index(name='image_n')

    return df_imgn

def count_cells(df, cols):
    '''
    takes a df and counts the number of instances each distinct row
    (created by unique combinations of labels from columns indicated
    by cols arg); counts are reported in a new col called "cell_counts"
    args:
        df: pd.core.frame.DataFrame(N, M); N: the number of rows, M: the
            number of cols (assumed to have already been split by stain_type)
        cols: list(n), n: the number of cols over which to count distinct rows
    return:
        df_counts: pd.core.frame.DataFrame(N,M+1)
    '''
    df_counts = df.value_counts(cols)\
        .reset_index(name='cell_counts')\
        .sort_values(by=cols)

    return df_counts

def sum_cells(df, cols, iid):
    '''
    takes cell count df, groups by cols denoted in cols list and computes sum
    of cell_counts col for each group. Adds new column "cell_count_sums"
    containing sums.
    args:
        df: pd.core.frame.DataFrame(N, M), N: the number of rows (N=|id_col|),
            M: the number of cols, must contain col called "cell_counts"
        cols: list(M-2), list containing col name strings that define each group
            for group by and reduction (in this case summing)
        iid: str, denotes
    return:
        df_sums: pd.core.frame.DataFrame; dataframe containing summed cell
            counts per subject id.
    '''
    # remove image id col (we want to sum counts across all images per rat)
    reduce_cols = list(filter(lambda x: x != iid, cols))

    if 'scaled_counts' in df.columns:
            # group by, reduce
        df_sums = df.groupby(by=reduce_cols)[cols]\
            .apply(lambda x: np.sum(x.scaled_counts))\
            .reset_index(name='cell_count_sums')

    else:
        # group by, reduce
        df_sums = df.groupby(by=reduce_cols)[df.columns]\
            .apply(lambda x: np.sum(x.cell_counts))\
            .reset_index(name='cell_count_sums')

    return df_sums

def average_counts(df_sums, df_ns, cols, sid, iid):
    '''
    takes df of cell count sums and df of image ns, and computes the mean cell
    n (divides cell count sums by the number of images) for each subject.
    args:
        df_sums: pd.core.frame.DataFrame(ni, mi), ni: the number of rows
            (ni=|sid|), mi: the number of cols (mi = |cols|); must
            contain a col "cell_count_sums".
        df_ns: pd.core.frame.DataFrame(nj, mj), nj: the number of rows
            (nj=|sid|), mj: the number of cols (mj=2); must contain a col
            "image_n"
        cols: list(n), n: the number of cols (contains all cols necessary to
            create every unique group combination)
        sid: str, denoting the name of the col containing unique subject ids
        iid: str, denoting the name of the col containing unique image ids
    return:
        mean_cell_ns: pd.core.frame.DataFrame(N,M), N: the number of rows (N=
        |sid|), M: the number of cols (M=|cols|+2)

    '''
    # list of cols with out image id, since it was removed during the reduction step
    reduce_cols = list(filter(lambda x: x != iid, cols))

    # compute mean cell n
    mean_cell_ns = df_sums.join(df_ns.set_index(sid), on=sid, how='inner')\
        .sort_values(by=reduce_cols)
    mean_cell_ns['mean_cell_n'] = mean_cell_ns.cell_count_sums / mean_cell_ns.image_n

    # reorder so that subject id is the first col
    col_reorder = [sid] + list(filter(lambda x: x != sid, list(mean_cell_ns.columns)))
    mean_cell_ns = mean_cell_ns[col_reorder]

    return mean_cell_ns

def mean_cell_n(df_stain, df_full, cols, sid, iid, return_counts=False):
    '''
    wrapper function to compute mean cell ns; magnification/zoom factor
    is assuemd to be equal across all images. NOTE that we count total image
    ns based on full cleaned dataset: it may be the case the not every image
    contains every stain type combination, and we must still count images
    with 0 cells of a particular stain type towards the total number of images.
    args:
        df_stain: pd.core.frame.DataFrame; df containing data for a given stain type
        df_full: pd.core.frame.DataFrame; df containing data for full (cleaned) set
        cols: list, contains str denoting col names for grouping
        sid: str, col name denoting col containing unique subject ids
        iid: str, col name denoting col containing unique image ids
        return_counts: bool, flag for added utility during debugging
    return:
        mean_cell_ns: pd.core.frame.DataFrame; df containing final mean cell ns
        cell_counts: pd.core.frame. DataFram; df containing cell counts per
            image (for debugging)

    '''
    # count n of unique image names per subject
    img_ns = count_imgs(df_full, sid, iid)

    # count n of cells per image for each subject
    cell_counts = count_cells(df_stain, cols)

    # sum cell counts across all images for each subject
    cell_sums = sum_cells(cell_counts, cols, iid)

    # compute mean cell count per image for each subject
    mean_cell_ns = average_counts(cell_sums, img_ns, cols, sid, iid)
    
    # EDITED 4/15/2024 by JR: added split cols for reactivation and treatment conditions
    mean_cell_ns['react'], mean_cell_ns['treat'] = zip(*mean_cell_ns.treatment.apply(lambda x: x.split('_')))

    if not return_counts:
        return mean_cell_ns

    return (cell_counts, mean_cell_ns)


In [4]:
# selecting all PV cells
df_PV = df.query('stain_type == "PV"')

# selecting all PV cells with Npas4 but NOT cFos
# not account for PNNs
df_PV_Npas4 = df_PV.query('dummy_Npas4 == True')
df_PV_Npas4_cFosm = df_PV_Npas4.query('dummy_cFos == False')
df_PV_Npas4_cFosp = df_PV_Npas4.query('dummy_cFos == True')

# accounting for PNNs
df_PV_Npas4_WFAm = df_PV_Npas4.query('dummy_WFA == False')
df_PV_Npas4_WFAp = df_PV_Npas4.query('dummy_WFA == True')

# for PV/Npas4+: cFos +/- in WFA+/-
df_PV_Npas4_WFAm_cFosm = df_PV_Npas4_WFAm.query('dummy_cFos == False')
df_PV_Npas4_WFAm_cFosp = df_PV_Npas4_WFAm.query('dummy_cFos == True')
df_PV_Npas4_WFAp_cFosm = df_PV_Npas4_WFAp.query('dummy_cFos == False')
df_PV_Npas4_WFAp_cFosp = df_PV_Npas4_WFAp.query('dummy_cFos == True')

# selecting all PV cells with cFos but NOT Npas4
df_PV_cFos = df_PV.query('dummy_cFos == True')
df_PV_cFos_Npas4m = df_PV_cFos.query('dummy_Npas4 == False')
df_PV_cFos_Npas4p = df_PV_cFos.query('dummy_Npas4 == True')

# accounting for PNNs
df_PV_cFos_WFAm = df_PV_cFos.query('dummy_WFA == False')
df_PV_cFos_WFAp = df_PV_cFos.query('dummy_WFA == True')

# for PV/cFos+: Npas4 +/- in WFA+/-
df_PV_cFos_WFAm_Npas4m = df_PV_cFos_WFAm.query('dummy_Npas4 == False')
df_PV_cFos_WFAm_Npas4p = df_PV_cFos_WFAm.query('dummy_Npas4 == True')
df_PV_cFos_WFAp_Npas4m = df_PV_cFos_WFAp.query('dummy_Npas4 == False')
df_PV_cFos_WFAp_Npas4p = df_PV_cFos_WFAp.query('dummy_Npas4 == True')

In [5]:
sid = 'rat_n'
iid = 'image_name'
cols = ['treatment', 'stain_type', sid, iid]
group = 'KET-VR5'

# PV/Npas4
df_PV_Npas4_cFosm_COUNTS = mean_cell_n(df_PV_Npas4_cFosm, df, cols, sid, iid)
df_PV_Npas4_cFosp_COUNTS = mean_cell_n(df_PV_Npas4_cFosp, df, cols, sid, iid)
df_PV_Npas4_WFAm_COUNTS = mean_cell_n(df_PV_Npas4_WFAm, df, cols, sid, iid)
df_PV_Npas4_WFAp_COUNTS = mean_cell_n(df_PV_Npas4_WFAp, df, cols, sid, iid)
df_PV_Npas4_WFAm_cFosm_COUNTS = mean_cell_n(df_PV_Npas4_WFAm_cFosm, df, cols, sid, iid)
df_PV_Npas4_WFAm_cFosp_COUNTS = mean_cell_n(df_PV_Npas4_WFAm_cFosp, df, cols, sid, iid)
df_PV_Npas4_WFAp_cFosm_COUNTS = mean_cell_n(df_PV_Npas4_WFAp_cFosm, df, cols, sid, iid)
df_PV_Npas4_WFAp_cFosp_COUNTS = mean_cell_n(df_PV_Npas4_WFAp_cFosp, df, cols, sid, iid)

# PV/cFos
df_PV_cFos_Npas4m_COUNTS = mean_cell_n(df_PV_cFos_Npas4m, df, cols, sid, iid)
df_PV_cFos_Npas4p_COUNTS = mean_cell_n(df_PV_cFos_Npas4p, df, cols, sid, iid)
df_PV_cFos_WFAm_COUNTS = mean_cell_n(df_PV_cFos_WFAm, df, cols, sid, iid)
df_PV_cFos_WFAp_COUNTS = mean_cell_n(df_PV_cFos_WFAp, df, cols, sid, iid)
df_PV_cFos_WFAm_Npas4m_COUNTS = mean_cell_n(df_PV_cFos_WFAm_Npas4m, df, cols, sid, iid)
df_PV_cFos_WFAm_Npas4p_COUNTS = mean_cell_n(df_PV_cFos_WFAm_Npas4p, df, cols, sid, iid)
df_PV_cFos_WFAp_Npas4m_COUNTS = mean_cell_n(df_PV_cFos_WFAp_Npas4m, df, cols, sid, iid)
df_PV_cFos_WFAp_Npas4p_COUNTS = mean_cell_n(df_PV_cFos_WFAp_Npas4p, df, cols, sid, iid)

In [6]:
### adding respective dummy col labels for grouping during ANOVA
def get_dummies(df, dummies):
    '''
    df is a dataframe (of mean cell counts)
    dummies is a dictionary mapping stain type to either True or False
    '''
    for key, val in dummies.items():
        df[key] = val
        
    return df

# PV/Npas4
df_PV_Npas4_cFosm_COUNTS = get_dummies(df_PV_Npas4_cFosm_COUNTS, {'PV':True, 'Npas4':True, 'cFos':False})
df_PV_Npas4_cFosp_COUNTS = get_dummies(df_PV_Npas4_cFosp_COUNTS, {'PV':True, 'Npas4':True, 'cFos':True})
df_PV_Npas4_WFAm_COUNTS = get_dummies(df_PV_Npas4_WFAm_COUNTS, {'PV':True, 'Npas4':True, 'WFA':False})
df_PV_Npas4_WFAp_COUNTS = get_dummies(df_PV_Npas4_WFAp_COUNTS, {'PV':True, 'Npas4':True, 'WFA':True})
df_PV_Npas4_WFAm_cFosm_COUNTS = get_dummies(df_PV_Npas4_WFAm_cFosm_COUNTS, {'PV':True, 'Npas4':True, 'WFA':False, 'cFos':False})
df_PV_Npas4_WFAm_cFosp_COUNTS = get_dummies(df_PV_Npas4_WFAm_cFosp_COUNTS, {'PV':True, 'Npas4':True, 'WFA':False, 'cFos':True})
df_PV_Npas4_WFAp_cFosm_COUNTS = get_dummies(df_PV_Npas4_WFAp_cFosm_COUNTS, {'PV':True, 'Npas4':True, 'WFA':True, 'cFos':False})
df_PV_Npas4_WFAp_cFosp_COUNTS = get_dummies(df_PV_Npas4_WFAp_cFosp_COUNTS, {'PV':True, 'Npas4':True, 'WFA':True, 'cFos':True})

# PV/cFos
df_PV_cFos_Npas4m_COUNTS = get_dummies(df_PV_cFos_Npas4m_COUNTS, {'PV':True, 'cFos':True, 'Npas4':False})
df_PV_cFos_Npas4p_COUNTS = get_dummies(df_PV_cFos_Npas4p_COUNTS, {'PV':True, 'cFos':True, 'Npas4':True})
df_PV_cFos_WFAm_COUNTS = get_dummies(df_PV_cFos_WFAm_COUNTS, {'PV':True, 'cFos':True, 'WFA':False})
df_PV_cFos_WFAp_COUNTS = get_dummies(df_PV_cFos_WFAp_COUNTS, {'PV':True, 'cFos':True, 'WFA':True})
df_PV_cFos_WFAm_Npas4m_COUNTS = get_dummies(df_PV_cFos_WFAm_Npas4m_COUNTS, {'PV':True, 'cFos':True, 'WFA':False, 'Npas4':False})
df_PV_cFos_WFAm_Npas4p_COUNTS = get_dummies(df_PV_cFos_WFAm_Npas4p_COUNTS, {'PV':True, 'cFos':True, 'WFA':False, 'Npas4':True})
df_PV_cFos_WFAp_Npas4m_COUNTS = get_dummies(df_PV_cFos_WFAp_Npas4m_COUNTS, {'PV':True, 'cFos':True, 'WFA':True, 'Npas4':False})
df_PV_cFos_WFAp_Npas4p_COUNTS = get_dummies(df_PV_cFos_WFAp_Npas4p_COUNTS, {'PV':True, 'cFos':True, 'WFA':True, 'Npas4':True})

In [7]:
import shutil

if os.path.exists('Ensemble_cell_ns') and os.path.isdir('Ensemble_cell_ns'):
    shutil.rmtree('Ensemble_cell_ns')

os.mkdir('Ensemble_cell_ns')

# PV/Npas4
df_PV_Npas4_cFosm_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_cFosm_COUNTS.csv')
df_PV_Npas4_cFosp_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_cFosp_COUNTS.csv')
df_PV_Npas4_WFAm_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_WFAm_COUNTS.csv')
df_PV_Npas4_WFAp_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_WFAp_COUNTS.csv')
df_PV_Npas4_WFAm_cFosm_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_WFAm_cFosm_COUNTS.csv')
df_PV_Npas4_WFAm_cFosp_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_WFAm_cFosp_COUNTS.csv')
df_PV_Npas4_WFAp_cFosm_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_WFAp_cFosm_COUNTS.csv')
df_PV_Npas4_WFAp_cFosp_COUNTS.to_csv('Ensemble_cell_ns/PV_Npas4_WFAp_cFosp_COUNTS.csv')

# PV/cFos
df_PV_cFos_Npas4m_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_Npas4m_COUNTS.csv')
df_PV_cFos_Npas4p_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_Npas4p_COUNTS.csv')
df_PV_cFos_WFAm_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_WFAm_COUNTS.csv')
df_PV_cFos_WFAp_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_WFAp_COUNTS.csv')
df_PV_cFos_WFAm_Npas4m_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_WFAm_Npas4m_COUNTS.csv')
df_PV_cFos_WFAm_Npas4p_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_WFAm_Npas4p_COUNTS.csv')
df_PV_cFos_WFAp_Npas4m_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_WFAp_Npas4m_COUNTS.csv')
df_PV_cFos_WFAp_Npas4p_COUNTS.to_csv('Ensemble_cell_ns/PV_cFos_WFAp_Npas4p_COUNTS.csv')

## one more thing! Npas4/cFos+/- vs cFos/Npas4+/- (NOT considerint only PVs)

In [13]:
# selecting all cFos
df_cFos = df.query('stain_type == "cFos"')
df_cFos_Npas4p = df_cFos.query('dummy_Npas4 == True')
df_cFos_Npas4m = df_cFos.query('dummy_Npas4 == False')
df_cFos_Npas4p_WFAp = df_cFos_Npas4p.query('dummy_WFA == True')
df_cFos_Npas4p_WFAm = df_cFos_Npas4p.query('dummy_WFA == False')
df_cFos_Npas4m_WFAp = df_cFos_Npas4m.query('dummy_WFA == True')
df_cFos_Npas4m_WFAm = df_cFos_Npas4m.query('dummy_WFA == False')

# selecting all Npas4
df_Npas4 = df.query('stain_type == "Npas4"')
df_Npas4_cFosp = df_Npas4.query('dummy_cFos == True')
df_Npas4_cFosm = df_Npas4.query('dummy_cFos == False')
df_Npas4_cFosp_WFAp = df_Npas4_cFosp.query('dummy_WFA == True')
df_Npas4_cFosp_WFAm = df_Npas4_cFosp.query('dummy_WFA == False')
df_Npas4_cFosm_WFAp = df_Npas4_cFosm.query('dummy_WFA == True')
df_Npas4_cFosm_WFAm = df_Npas4_cFosm.query('dummy_WFA == False')

### get mean cell ns
sid = 'rat_n'
iid = 'image_name'
cols = ['treatment', 'stain_type', sid, iid]
group = 'KET-VR5'

# cFos with or without Npas4
df_cFos_Npas4m_COUNTS = mean_cell_n(df_cFos_Npas4m, df, cols, sid, iid)
df_cFos_Npas4p_COUNTS = mean_cell_n(df_cFos_Npas4p, df, cols, sid, iid)
df_cFos_Npas4p_WFAp_COUNTS = mean_cell_n(df_cFos_Npas4p_WFAp, df, cols, sid, iid)
df_cFos_Npas4p_WFAm_COUNTS = mean_cell_n(df_cFos_Npas4p_WFAm, df, cols, sid, iid)
df_cFos_Npas4m_WFAp_COUNTS = mean_cell_n(df_cFos_Npas4m_WFAp, df, cols, sid, iid)
df_cFos_Npas4m_WFAm_COUNTS = mean_cell_n(df_cFos_Npas4m_WFAm, df, cols, sid, iid)

# Npas4 with or without cFos
df_Npas4_cFosm_COUNTS = mean_cell_n(df_Npas4_cFosm, df, cols, sid, iid)
df_Npas4_cFosp_COUNTS = mean_cell_n(df_Npas4_cFosp, df, cols, sid, iid)
df_Npas4_cFosp_WFAp_COUNTS = mean_cell_n(df_Npas4_cFosp_WFAp, df, cols, sid, iid)
df_Npas4_cFosp_WFAm_COUNTS = mean_cell_n(df_Npas4_cFosp_WFAm, df, cols, sid, iid)
df_Npas4_cFosm_WFAp_COUNTS = mean_cell_n(df_Npas4_cFosm_WFAp, df, cols, sid, iid)
df_Npas4_cFosm_WFAm_COUNTS = mean_cell_n(df_Npas4_cFosm_WFAm, df, cols, sid, iid)

### get dummies
df_cFos_Npas4m_COUNTS = get_dummies(df_cFos_Npas4m_COUNTS, {'cFos':True, 'Npas4':False})
df_cFos_Npas4p_COUNTS = get_dummies(df_cFos_Npas4p_COUNTS, {'cFos':True, 'Npas4':True})
df_Npas4_cFosm_COUNTS = get_dummies(df_Npas4_cFosm_COUNTS, {'Npas4':True, 'cFos':False})
df_Npas4_cFosp_COUNTS = get_dummies(df_Npas4_cFosp_COUNTS, {'Npas4':True, 'cFos':True})
df_cFos_Npas4m_WFAm_COUNTS = get_dummies(df_cFos_Npas4m_WFAm_COUNTS, {'cFos':True, 'Npas4':False, 'WFA':False})
df_cFos_Npas4m_WFAp_COUNTS = get_dummies(df_cFos_Npas4m_WFAp_COUNTS, {'cFos':True, 'Npas4':False, 'WFA':True})
df_cFos_Npas4p_WFAm_COUNTS = get_dummies(df_cFos_Npas4p_WFAm_COUNTS, {'cFos':True, 'Npas4':True, 'WFA':False})
df_cFos_Npas4p_WFAp_COUNTS = get_dummies(df_cFos_Npas4p_WFAp_COUNTS, {'cFos':True, 'Npas4':True, 'WFA':True})
df_Npas4_cFosm_WFAm_COUNTS = get_dummies(df_Npas4_cFosm_WFAm_COUNTS, {'cFos':False, 'Npas4':True, 'WFA':False})
df_Npas4_cFosm_WFAp_COUNTS = get_dummies(df_Npas4_cFosm_WFAp_COUNTS, {'cFos':False, 'Npas4':True, 'WFA':True})
df_Npas4_cFosp_WFAm_COUNTS = get_dummies(df_Npas4_cFosp_WFAm_COUNTS, {'cFos':True, 'Npas4':True, 'WFA':False})
df_Npas4_cFosp_WFAp_COUNTS = get_dummies(df_Npas4_cFosp_WFAp_COUNTS, {'cFos':True, 'Npas4':True, 'WFA':True})

if os.path.exists('Ensemble_cell_ns_cFos_Npas4') and os.path.isdir('Ensemble_cell_ns_cFos_Npas4'):
    shutil.rmtree('Ensemble_cell_ns_cFos_Npas4')
os.mkdir('Ensemble_cell_ns_cFos_Npas4')

df_cFos_Npas4m_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/cFos_Npas4m_COUNTS.csv')
df_cFos_Npas4p_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/cFos_Npas4p_COUNTS.csv')
df_Npas4_cFosm_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/Npas4_cFosm_COUNTS.csv')
df_Npas4_cFosp_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/Npas4_cFosp_COUNTS.csv')
df_cFos_Npas4m_WFAm_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/cFos_Npas4m_WFAm_COUNTS.csv')
df_cFos_Npas4m_WFAp_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/cFos_Npas4m_WFAp_COUNTS.csv')
df_cFos_Npas4p_WFAm_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/cFos_Npas4p_WFAm_COUNTS.csv')
df_cFos_Npas4p_WFAp_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/cFos_Npas4p_WFAp_COUNTS.csv')
df_Npas4_cFosm_WFAm_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/Npas4_cFosm_WFAm_COUNTS.csv')
df_Npas4_cFosm_WFAp_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/Npas4_cFosm_WFAp_COUNTS.csv')
df_Npas4_cFosp_WFAm_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/Npas4_cFosp_WFAm_COUNTS.csv')
df_Npas4_cFosp_WFAp_COUNTS.to_csv('Ensemble_cell_ns_cFos_Npas4/Npas4_cFosp_WFAp_COUNTS.csv')

In [15]:
df_cFos_Npas4m_COUNTS

Unnamed: 0,rat_n,treatment,stain_type,cell_count_sums,image_n,mean_cell_n,react,treat,cFos,Npas4
0,KET-10-12,FR1_KET,cFos,291,5,58.2,FR1,KET,True,False
1,KET-9-1,FR1_KET,cFos,98,4,24.5,FR1,KET,True,False
2,PE-12-1,FR1_KET,cFos,290,5,58.0,FR1,KET,True,False
3,PE-12-2,FR1_KET,cFos,264,5,52.8,FR1,KET,True,False
4,PE-12-7,FR1_KET,cFos,184,5,36.8,FR1,KET,True,False
5,KET-10-1,FR1_SAL,cFos,135,5,27.0,FR1,SAL,True,False
6,KET-10-5,FR1_SAL,cFos,231,5,46.2,FR1,SAL,True,False
7,KET-8-2,FR1_SAL,cFos,135,5,27.0,FR1,SAL,True,False
8,KET-9-2,FR1_SAL,cFos,91,5,18.2,FR1,SAL,True,False
9,KET-9-4,FR1_SAL,cFos,146,5,29.2,FR1,SAL,True,False
