In [102]:
import os
import glob
import pandas as pd

In [103]:
pwd

'/Users/michaelpiacentino/Drive/git/data/smpd3/TCFLef'

In [104]:
# Add experiment date here to apply to dataframe
analysis_date = '20181108'

path = os.path.abspath('')+'/CSVs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe


for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in ROI label
    df['Fluor'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))
    # Split values in Image name column
    (df['ExptDate'], df['Treatment'], df['Stains'], df['Embryo'], 
        df['Somites'], df['ImageMag'], df['Section']) = zip(*df['Image'].map(lambda x: x.split('_')))
    
    # Replace background ROI names
        # NOTE: I have updated the Fiji macro ('FluorIntensity_2Channel.ijm') to name all background ROIs as 'background',
        # so this step will be unnecessary with freshly collected data
    df.replace(to_replace=['back1a', 'back1b', 'back1c', 'back2a', 'back2b', 'back2c'],
                            value=['background', 'background', 'background', 'background', 'background', 'background'], 
                            inplace=True)
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,Fluor,ROI,ExptDate,Treatment,Stains,Embryo,Somites,ImageMag,Section
0,1,TCFLef:back1a,73.48,14.734,1082.676,21011.0,20180616_SMPD3gRNA1DNA_Pax7LamininTCFLefRFP_Em...,TCFLef,background,20180616,SMPD3gRNA1DNA,Pax7LamininTCFLefRFP,Emb4,9ss,20x,sec3
1,2,TCFLef:back1b,32.618,14.624,477.004,9257.0,20180616_SMPD3gRNA1DNA_Pax7LamininTCFLefRFP_Em...,TCFLef,background,20180616,SMPD3gRNA1DNA,Pax7LamininTCFLefRFP,Emb4,9ss,20x,sec3
2,3,TCFLef:back1c,42.511,14.715,625.562,12140.0,20180616_SMPD3gRNA1DNA_Pax7LamininTCFLefRFP_Em...,TCFLef,background,20180616,SMPD3gRNA1DNA,Pax7LamininTCFLefRFP,Emb4,9ss,20x,sec3
3,4,TCFLef:Cntl,2919.066,187.387,546994.145,10615268.0,20180616_SMPD3gRNA1DNA_Pax7LamininTCFLefRFP_Em...,TCFLef,Cntl,20180616,SMPD3gRNA1DNA,Pax7LamininTCFLefRFP,Emb4,9ss,20x,sec3
4,5,TCFLef:Expt,4412.531,116.435,513775.254,9970604.0,20180616_SMPD3gRNA1DNA_Pax7LamininTCFLefRFP_Em...,TCFLef,Expt,20180616,SMPD3gRNA1DNA,Pax7LamininTCFLefRFP,Emb4,9ss,20x,sec3


In [105]:
# Get a list of treatments
treatment_list = full_df.Treatment.unique()
treatment_list = treatment_list.tolist()

# Mean background values and group by Treatment, Embryo, Fluor, ROI and Section
mean_sections = ((full_df.groupby(['Treatment', 'Embryo', 'Fluor', 'ROI', 'Section', 'ExptDate'])
                  ['Area', 'Mean', 'IntDen']).mean())

# Loop trough treatments, performing each analysis and exporting CSV file for each treatment
for i in treatment_list:
    # Slice dataframe to process only embryos with given treatment
    treatment = i
    df_treatment = pd.DataFrame(mean_sections.xs(treatment))

    # Determine CTCF values = ROI IntDen - (background mean * ROI area)
    # Calculate background (background mean * ROI area)
    background_corr_cntl = (df_treatment.xs('background', level='ROI')['Mean'] 
                    * df_treatment.xs('Cntl', level='ROI')['Area'])
    background_corr_expt = (df_treatment.xs('background', level='ROI')['Mean'] 
                    * df_treatment.xs('Expt', level='ROI')['Area'])

    # Slice out only Cntl or Expt values in IntDen
    intdens_cntl = df_treatment.xs('Cntl', level='ROI')['IntDen'] 
    intdens_expt = df_treatment.xs('Expt', level='ROI')['IntDen'] 

    # Subtract background from IntDens to determine CTCF and concatenate into single dataframe
    sub_cntl = pd.DataFrame(intdens_cntl - background_corr_cntl)
    sub_expt = pd.DataFrame(intdens_expt - background_corr_expt)
    full_ctcf = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
    full_ctcf.columns = ['CTCF']

    # Pull out TCFLef and pCIG values
    ctcf_tcflef = full_ctcf.xs('TCFLef', level='Fluor')['CTCF'] 
    ctcf_pcig = full_ctcf.xs('pCIG', level='Fluor')['CTCF'] 

    # Normalize for electroporation efficiency by determining TCFLef/pCIG
    electroporation_norm = pd.DataFrame(ctcf_tcflef / ctcf_pcig)
    electroporation_norm.columns = ['CTCF']
    electroporation_norm.index.names = ['Side', 'Embryo', 'Section', 'ExptDate']

    # Average sections grouped by embryos before generating Expt/Cntl ratio
    averaged_sections = electroporation_norm.groupby(['Side','Embryo', 'ExptDate']).mean()

    # Pull out Cntl and Expt CTCFs
    ctcf_cntl = averaged_sections.xs('Cntl', level='Side')['CTCF'] 
    ctcf_expt = averaged_sections.xs('Expt', level='Side')['CTCF'] 

    # Generate ratios as Expt/Cntl
    ratios_sections = pd.DataFrame(ctcf_expt / ctcf_cntl)
    ratios_sections.columns = ['Expt/Cntl CTCF']

    # Normalize individual values to mean of control group
    norm_cntl = ctcf_cntl/(float(ctcf_cntl.mean()))
    norm_cntl = pd.DataFrame(norm_cntl)
    norm_cntl.columns = ['Norm Cntl CTCF']
    norm_expt = ctcf_expt/(float(ctcf_cntl.mean()))
    norm_expt = pd.DataFrame(norm_expt)
    norm_expt.columns = ['Norm Expt CTCF']

    # Combine processed values into single dataframe and output as csv file 'Results.csv'
    ctcf_cntl = pd.DataFrame(ctcf_cntl)
    ctcf_cntl.columns = ['Cntl CTCF']
    ctcf_expt = pd.DataFrame(ctcf_expt)
    ctcf_expt.columns = ['expt CTCF']
    results = (pd.concat([ctcf_cntl, ctcf_expt, ratios_sections, norm_cntl, norm_expt], axis=1, sort=True)).reset_index()
    results['ID'] = results.ExptDate.str.cat(results.Embryo)
    results.to_csv(analysis_date + '_' + treatment + '_CTCFResults.csv')