In [2]:
import os
import glob
import pandas as pd

In [3]:
path =r'/Users/michaelpiacentino/Drive/git/data/smpd3/20180616_CRISPR_TCFLef_sections/csvs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in ROI label and Image name columns
    df['Fluor'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))
    df['Target'], df['Method'], df['Embryo'], df['Section'] = zip(*df['Image'].map(lambda x: x.split('_')))
    
    # Replace background ROI names
        # NOTE: I have updated the Fiji macro ('FluorIntensity_2Channel.ijm') to name all background ROIs as 'background',
        # so this step will be unnecessary with freshly collected data
    df.replace(to_replace=['back1a', 'back1b', 'back1c', 'back2a', 'back2b', 'back2c'],
                            value=['background', 'background', 'background', 'background', 'background', 'background'], 
                            inplace=True)
    list_.append(df)

full_df = pd.concat(list_)

In [4]:
# Mean background values and group
grouped_means = (full_df.groupby(['Embryo', 'Fluor', 'ROI', 'Section'])['Area', 'Mean', 'IntDen']).mean()

In [46]:
# Determine CTCF values = ROI IntDen - (background area * background mean)

# Calculate background (background area * background mean)
backgrounds = (grouped_means.xs('background', level='ROI')['Area'] 
                * grouped_means.xs('background', level='ROI')['Mean'])

# Slice out only Cntl or Expt values in IntDen
intdens_cntl = grouped_means.xs('Cntl', level='ROI')['IntDen'] 
intdens_expt = grouped_means.xs('Expt', level='ROI')['IntDen'] 

# Subtract background from IntDens to determine CTCF and concatenate into single dataframe
sub_cntl = pd.DataFrame(intdens_cntl - backgrounds)
sub_expt = pd.DataFrame(intdens_expt - backgrounds)
background_sub = pd.concat([sub_cntl, sub_expt], keys = ['Cntl', 'Expt'])
background_sub.columns = ['CTCF']

In [49]:
# Pull out TCFLef and pCIG values
ctcf_tcflef = background_sub.xs('TCFLef', level='Fluor')['CTCF'] 
ctcf_pcig = background_sub.xs('pCIG', level='Fluor')['CTCF'] 

# Normalize for electroporation efficiency by determining TCFLef/pCIG
electroporation_norm = pd.DataFrame(ctcf_tcflef / ctcf_pcig)
electroporation_norm.columns = ['CTCF']
electroporation_norm.index.names = ['Treatment', 'Embryo', 'Selection']

In [48]:
# Average sections grouped by embryos before generating Expt/Cntl ratio
averaged_sections = electroporation_norm.groupby(['Treatment','Embryo']).mean()

# Pull out Cntl and Expt CTCFs
ctcf_cntl = averaged_sections.xs('Cntl', level='Treatment')['CTCF'] 
ctcf_expt = averaged_sections.xs('Expt', level='Treatment')['CTCF'] 

# Generate ratios as Expt/Cntl
ratios_sections = pd.DataFrame(ctcf_expt / ctcf_cntl)
ratios_sections.columns = ['Expt/Cntl CTCF']

In [52]:
# Combine processed values into single dataframe and output as csv file 'Results.csv'
ctcf_cntl = pd.DataFrame(ctcf_cntl)
ctcf_cntl.columns = ['Cntl CTCF']
ctcf_cntl = ctcf_cntl.groupby('Embryo').mean()
ctcf_expt = pd.DataFrame(ctcf_expt)
ctcf_expt.columns = ['Expt CTCF']
ctcf_expt = ctcf_expt.groupby('Embryo').mean()
results = pd.concat([ctcf_cntl, ctcf_expt, ratios_sections], axis=1, sort=True)
results.to_csv('Results.csv')