In [1]:
import os
import glob
import pandas as pd

In [2]:
pwd

'/Users/michaelpiacentino/Drive/git/data/smpd3/Snai2Counts'

In [3]:
# Add experiment date here to apply to dataframe
analysis_date = '20181202'

path = os.path.abspath('')+'/CSVs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in Image name column
    (df['ExptDate'], df['Treatment'], df['Dose'], df['Stains'], df['Embryo'], 
        df['Somites'], df['ImageMag'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Slice,Count,Total Area,Average Size,%Area,Mean,IntDen,Image,ExptDate,Treatment,Dose,Stains,Embryo,Somites,ImageMag,Section
0,CntlSide,34,1177.798,34.641,22.929,255,8833.488,"20181119_SMPD3MO_1,0mM_Pax7Snai2pbetacat_Emb5_...",20181119,SMPD3MO,"1,0mM",Pax7Snai2pbetacat,Emb5,6ss,20x,sec2
1,ExptSide,20,551.876,27.594,15.383,255,7036.414,"20181119_SMPD3MO_1,0mM_Pax7Snai2pbetacat_Emb5_...",20181119,SMPD3MO,"1,0mM",Pax7Snai2pbetacat,Emb5,6ss,20x,sec2
0,CntlSide,22,881.558,40.071,24.729,255,10218.06,"20181119_SMPD3MO_1,0mM_Pax7Snai2pbetacat_Emb5_...",20181119,SMPD3MO,"1,0mM",Pax7Snai2pbetacat,Emb5,6ss,20x,sec3
1,ExptSide,17,366.165,21.539,12.889,255,5492.476,"20181119_SMPD3MO_1,0mM_Pax7Snai2pbetacat_Emb5_...",20181119,SMPD3MO,"1,0mM",Pax7Snai2pbetacat,Emb5,6ss,20x,sec3
0,CntlSide,32,592.635,18.52,17.365,255,4722.56,"20181119_SMPD3MO_1,0mM_Pax7Snai2pbetacat_Emb5_...",20181119,SMPD3MO,"1,0mM",Pax7Snai2pbetacat,Emb5,6ss,20x,sec1


In [4]:
# Get a list of treatments
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()

# Mean counts across sections
mean_sections = pd.DataFrame((full_df.groupby(['Treatment', 'Embryo', 'Slice', 'ExptDate'])['Count']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(mean_sections.xs(treatment))

    # Extract means for Cntl and Expt counts
    counts_cntl = df_treatment.xs('CntlSide', level='Slice')['Count'] 
    counts_expt = df_treatment.xs('ExptSide', level='Slice')['Count']

    # Generate ratios as Expt/Cntl
    counts_ratios = pd.DataFrame(counts_expt / counts_cntl)
    counts_ratios.columns = ['Expt/Cntl Counts']

    # Normalize individual values to mean of control group
    norm_cntl = counts_cntl/(float(counts_cntl.mean()))
    norm_expt = counts_expt/(float(counts_cntl.mean()))

    # Combine processed values into single dataframe and output as csv file
    counts_cntl = pd.DataFrame(counts_cntl)
    counts_cntl.columns = ['Cntl Counts']
    counts_expt = pd.DataFrame(counts_expt)
    counts_expt.columns = ['Expt Counts']
    counts_ratios = pd.DataFrame(counts_ratios)
    counts_ratios.columns = ['Expt/Cntl Counts']
    norm_cntl = pd.DataFrame(norm_cntl)
    norm_cntl.columns = ['Norm Cntl Counts']
    norm_expt = pd.DataFrame(norm_expt)
    norm_expt.columns = ['Norm Expt Counts']
    results = (pd.concat([counts_cntl, counts_expt, counts_ratios, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
    results['ID'] = results.ExptDate.str.cat(results.Embryo)
    
    # Save out results at CSV file, update file name
    results.to_csv(analysis_date + '_' + treatment + '_Snai2CountResults.csv')