In [1]:
import os
import glob
import pandas as pd

In [2]:
pwd

'/Users/michaelpiacentino/Drive/git/data/smpd3/Pax7Counts'

In [3]:
# Add experiment date here to apply to dataframe
analysis_date = '20181107'

path = os.path.abspath('')+'/CSVs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in Image name column
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'], 
        df['Somites'], df['ImageMag'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Slice,Count,Total Area,Average Size,%Area,Mean,IntDen,Image,ExptDate,Treatment,Stains,Embryo,Somites,ImageMag,Section
0,CntlSide,19,1816.81,95.622,22.363,255,24383.5,20180418_SMPD3gRNA1DNA_Pax7NCadLaminin_Emb6_8s...,20180418,SMPD3gRNA1DNA,Pax7NCadLaminin,Emb6,8ss,20x,sec5
1,ExptSide,12,1648.774,137.398,25.406,255,35036.44,20180418_SMPD3gRNA1DNA_Pax7NCadLaminin_Emb6_8s...,20180418,SMPD3gRNA1DNA,Pax7NCadLaminin,Emb6,8ss,20x,sec5
0,CntlSide,29,1285.597,44.331,10.478,255,11304.389,20171016_SMPD3gRNA1DNA_Pax7Sox9_Emb7_8ss_20x_sec6,20171016,SMPD3gRNA1DNA,Pax7Sox9,Emb7,8ss,20x,sec6
1,ExptSide,24,1443.946,60.164,11.572,255,15341.925,20171016_SMPD3gRNA1DNA_Pax7Sox9_Emb7_8ss_20x_sec6,20171016,SMPD3gRNA1DNA,Pax7Sox9,Emb7,8ss,20x,sec6
0,CntlSide,12,4890.824,407.569,49.641,255,103930.014,20180616_SMPD3gRNA1DNA_Pax7LamininTCFLef_Emb2_...,20180616,SMPD3gRNA1DNA,Pax7LamininTCFLef,Emb2,8ss,20x,sec1


In [4]:
# Get a list of treatments
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()

# Mean counts across sections
mean_sections = pd.DataFrame((full_df.groupby(['Treatment', 'Embryo', 'Slice', 'ExptDate'])['Count']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(mean_sections.xs(treatment))

    # Extract means for Cntl and Expt counts
    pax7_cntl = df_treatment.xs('CntlSide', level='Slice')['Count'] 
    pax7_expt = df_treatment.xs('ExptSide', level='Slice')['Count']

    # Generate ratios as Expt/Cntl
    pax7_ratios = pd.DataFrame(pax7_expt / pax7_cntl)
    pax7_ratios.columns = ['Expt/Cntl Pax7']

    # Normalize individual values to mean of control group
    norm_cntl = pax7_cntl/(float(pax7_cntl.mean()))
    norm_expt = pax7_expt/(float(pax7_cntl.mean()))

    # Combine processed values into single dataframe and output as csv file
    pax7_cntl = pd.DataFrame(pax7_cntl)
    pax7_cntl.columns = ['Cntl Pax7']
    pax7_expt = pd.DataFrame(pax7_expt)
    pax7_expt.columns = ['Expt Pax7']
    pax7_ratios = pd.DataFrame(pax7_ratios)
    pax7_ratios.columns = ['Expt/Cntl Pax7']
    norm_cntl = pd.DataFrame(norm_cntl)
    norm_cntl.columns = ['Norm Cntl Pax7']
    norm_expt = pd.DataFrame(norm_expt)
    norm_expt.columns = ['Norm Expt Pax7']
    results = (pd.concat([pax7_cntl, pax7_expt, pax7_ratios, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
    results['ID'] = results.ExptDate.str.cat(results.Embryo)
    results.to_csv(analysis_date + '_Pax7CountResults.csv')