In [1]:
import os
import glob
import pandas as pd

In [2]:
pwd

'/Users/michaelpiacentino/Drive/git/data/smpd3/smpd3_dataset/20171016_gRNA1_Pax7_Sox9/sec/sox9counts'

In [3]:
# Add experiment date here to apply to dataframe
expt_date = '20171016'
treatment = 'SMPD3gRNA1'
result_type = 'Sox9Counts'

path = os.path.abspath('')+'/sox9csv/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Date'] = expt_date
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Parse Image name
    df['Embryo'], df['Somites'], df['Section'] = zip(*df['Image'].map(lambda x: x.split('_')))

    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Slice,Count,Total Area,Average Size,%Area,Mean,IntDen,Date,Image,Embryo,Somites,Section
0,CntlSide,14,408.213,29.158,3.327,255.0,7435.305,20171016,Emb7_8ss_sec6,Emb7,8ss,sec6
1,ExptSide,10,379.923,37.992,3.045,255.0,9688.046,20171016,Emb7_8ss_sec6,Emb7,8ss,sec6
0,CntlSide,0,0.0,,0.0,,,20171016,Emb9_9ss_sec4,Emb9,9ss,sec4
1,ExptSide,6,229.356,38.226,3.743,255.0,9747.614,20171016,Emb9_9ss_sec4,Emb9,9ss,sec4
0,CntlSide,28,1117.407,39.907,5.579,255.0,10176.381,20171016,Emb7_8ss_sec5,Emb7,8ss,sec5


In [4]:
# Mean counts across sections
mean_sections = pd.DataFrame((full_df.groupby(['Embryo', 'Slice'])['Count']).mean())
mean_sections
# Extract means for Cntl and Expt counts
sox9_cntl = mean_sections.xs('CntlSide', level='Slice')['Count'] 
sox9_expt = mean_sections.xs('ExptSide', level='Slice')['Count']

# Generate ratios as Expt/Cntl
sox9_ratios = pd.DataFrame(sox9_expt / sox9_cntl)
sox9_ratios.columns = ['Expt/Cntl Sox9']

In [5]:
# Combine processed values into single dataframe and output as csv file
sox9_cntl = pd.DataFrame(sox9_cntl)
sox9_cntl.columns = ['Cntl Sox9']
sox9_expt = pd.DataFrame(sox9_expt)
sox9_expt.columns = ['Expt Sox9']

results = pd.concat([sox9_cntl, sox9_expt, sox9_ratios], axis=1, sort=True)
results['ID'] = expt_date + '_' + results.index
results.to_csv(expt_date + '_' + treatment + '_' + result_type + '_results.csv')

In [6]:
# Normalize all values to mean of control group
mean_embs = pd.DataFrame((full_df.groupby(['Embryo', 'Slice'])['Count']).mean())
mean_cntl = pd.DataFrame((mean_embs.groupby(['Slice'])).mean())
norm_values = (mean_embs / mean_cntl.iloc[0]).reset_index()
norm_values_pivot = norm_values.pivot(index='Embryo', columns='Slice', values='Count')
norm_values_pivot['ID'] = expt_date + '_' + norm_values_pivot.index
norm_values_pivot.to_csv(expt_date + '_' + treatment + '_' + result_type + '_norm_values.csv')
