In [1]:
import os
import glob
import pandas as pd

In [2]:
pwd

'/Users/michaelpiacentino/Drive/git/data/nSMase2/Cad6B_and_Snai2_protein'

In [5]:
# Add experiment date here to apply to dataframe
analysis_date = '20190722'

path = os.path.abspath('')+'/Counts/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in Image name column
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'], 
        df['Somites'],  df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Slice,Count,Total Area,Average Size,%Area,Mean,IntDen,Image,ExptDate,Treatment,Stains,Embryo,Somites,Section
0,CntlSide,28,671.371,23.978,22.375,255,6114.275,"20190322_SMPD3MO0,8mM_Cad6BSnai2GFPNcad_Emb5_6...",20190322,"SMPD3MO0,8mM",Cad6BSnai2GFPNcad,Emb5,6ss,sec1
1,ExptSide,27,485.712,17.989,20.096,255,4587.283,"20190322_SMPD3MO0,8mM_Cad6BSnai2GFPNcad_Emb5_6...",20190322,"SMPD3MO0,8mM",Cad6BSnai2GFPNcad,Emb5,6ss,sec1
0,CntlSide,27,463.709,17.174,18.413,255,4379.478,"20190322_SMPD3MO0,8mM_Cad6BSnai2GFPNcad_Emb5_6...",20190322,"SMPD3MO0,8mM",Cad6BSnai2GFPNcad,Emb5,6ss,sec2
1,ExptSide,28,511.528,18.269,24.706,255,4658.562,"20190322_SMPD3MO0,8mM_Cad6BSnai2GFPNcad_Emb5_6...",20190322,"SMPD3MO0,8mM",Cad6BSnai2GFPNcad,Emb5,6ss,sec2
0,CntlSide,30,673.226,22.441,25.209,255,5722.424,"20190322_SMPD3MO0,8mM_Cad6BSnai2GFPNcad_Emb5_6...",20190322,"SMPD3MO0,8mM",Cad6BSnai2GFPNcad,Emb5,6ss,sec3


In [6]:
# Get a list of treatments
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()

# Mean counts across sections
mean_sections = pd.DataFrame((full_df.groupby(['Treatment', 'Embryo', 'Slice', 'ExptDate'])['Count']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(mean_sections.xs(treatment))

    # Extract means for Cntl and Expt counts
    counts_cntl = df_treatment.xs('CntlSide', level='Slice')['Count'] 
    counts_expt = df_treatment.xs('ExptSide', level='Slice')['Count']

    # Generate ratios as Expt/Cntl
    counts_ratios = pd.DataFrame(counts_expt / counts_cntl)
    counts_ratios.columns = ['Expt/Cntl Counts']

    # Normalize individual values to mean of control group
    norm_cntl = counts_cntl/(float(counts_cntl.mean()))
    norm_expt = counts_expt/(float(counts_cntl.mean()))

    # Combine processed values into single dataframe and output as csv file
    counts_cntl = pd.DataFrame(counts_cntl)
    counts_cntl.columns = ['Cntl Counts']
    counts_expt = pd.DataFrame(counts_expt)
    counts_expt.columns = ['Expt Counts']
    counts_ratios = pd.DataFrame(counts_ratios)
    counts_ratios.columns = ['Expt/Cntl Counts']
    norm_cntl = pd.DataFrame(norm_cntl)
    norm_cntl.columns = ['Norm Cntl Counts']
    norm_expt = pd.DataFrame(norm_expt)
    norm_expt.columns = ['Norm Expt Counts']
    results = (pd.concat([counts_cntl, counts_expt, counts_ratios, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
    results['ID'] = results.ExptDate.str.cat(results.Embryo)
    
    # Save out results at CSV file, update file name
    results.to_csv(analysis_date + '_' + treatment + '_Snai2CountResults.csv')