In [1]:
import os
import glob
import pandas as pd

In [2]:
# To do still:
#    Calculate CTCF
#        CTCF = Integrated Density – (Area of selected ROI X Mean fluorescence of background readings)
#    Plot, analyze CTCF values

In [3]:
path =r'/Users/michaelpiacentino/Drive/git/data/smpd3/20180616_CRISPR_TCFLef_sections/csvs/'
full_df = pd.DataFrame()
list_ = []

# For loop to bring in files and concatenate them into a single dataframe
for file_ in glob.glob(path + "/*.csv"):
    df = pd.read_csv(file_)
    # Determine Image name from file name
    df['Image'] = os.path.splitext(os.path.basename(file_))[0]
    # Split values in ROI label and Image name columns
    df['Fluor'], df['ROI'] = zip(*df['Label'].map(lambda x: x.split(':')))
    df['Target'], df['Method'], df['Embryo'], df['Section'] = zip(*df['Image'].map(lambda x: x.split('_')))
    
    # Replace background ROI names
        # NOTE: I have updated the Fiji macro ('FluorIntensity_2Channel.ijm') to name all background ROIs as 'background',
        # so this step will be unnecessary with freshly collected data
    df.replace(to_replace=['back1a', 'back1b', 'back1c', 'back2a', 'back2b', 'back2c'],
                            value=['background', 'background', 'background', 'background', 'background', 'background'], 
                            inplace=True)
    list_.append(df)

full_df = pd.concat(list_)
full_df.head()

Unnamed: 0,Unnamed: 1,Label,Area,Mean,IntDen,RawIntDen,Image,Fluor,ROI,Target,Method,Embryo,Section
0,1,TCFLef:back1a,34.937,13.963,487.825,9467.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,background,SMPD3,CRISPR,Emb1,sec1
1,2,TCFLef:back1b,37.565,13.726,515.599,10006.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,background,SMPD3,CRISPR,Emb1,sec1
2,3,TCFLef:back1c,42.202,13.664,576.661,11191.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,background,SMPD3,CRISPR,Emb1,sec1
3,4,TCFLef:Cntl,7227.2,130.882,945909.299,18356834.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,Cntl,SMPD3,CRISPR,Emb1,sec1
4,5,TCFLef:Expt,7117.289,126.978,903740.336,17538480.0,SMPD3_CRISPR_Emb1_sec1,TCFLef,Expt,SMPD3,CRISPR,Emb1,sec1


In [4]:
# Mean background values and group
grouped_means = (full_df.groupby(['Embryo', 'Fluor', 'ROI', 'Section'])['Area', 'Mean', 'IntDen']).mean()
grouped_means.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Area,Mean,IntDen
Embryo,Fluor,ROI,Section,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Emb1,TCFLef,Cntl,sec1,7227.2,130.882,945909.3
Emb1,TCFLef,Cntl,sec2,6666.101,123.361,822335.5
Emb1,TCFLef,Cntl,sec3,3883.174,283.885,1102374.0
Emb1,TCFLef,Expt,sec1,7117.289,126.978,903740.3
Emb1,TCFLef,Expt,sec2,7322.013,103.656,758971.7
Emb1,TCFLef,Expt,sec3,3481.093,147.838,514636.3
Emb1,TCFLef,background,sec1,38.234667,13.784333,526.695
Emb1,TCFLef,background,sec2,44.469667,14.191,631.5737
Emb1,TCFLef,background,sec3,55.135667,13.598,749.5923
Emb1,pCIG,Cntl,sec1,7227.2,156.637,1132045.0


In [5]:
# Calculate background to subtract from IntDen values
backgrounds = (grouped_means.xs('background', level='ROI')['Area'] 
                * grouped_means.xs('background', level='ROI')['Mean'])

# Slice out only Cntl or Expt values in IntDen
IntDens_Cntl = grouped_means.xs('Cntl', level='ROI')['IntDen'] 
IntDens_Expt = grouped_means.xs('Expt', level='ROI')['IntDen'] 

# Subtract background from IntDens and concatenate into single dataframe
sub_Cntl = pd.DataFrame(IntDens_Cntl - backgrounds)
sub_Expt = pd.DataFrame(IntDens_Expt - backgrounds)
df2 = pd.concat([sub_Cntl, sub_Expt], keys = ['Cntl', 'Expt'])
df2.columns = ['Intensity']

In [6]:
# Pull out TCFLef and pCIG values
IntDens_TCFLef = df2.xs('TCFLef', level='Fluor')['Intensity'] 
IntDens_pCIG = df2.xs('pCIG', level='Fluor')['Intensity'] 

# Generate ratios as TCFLef/pCIG for electroporation norm
electroporation_norm = pd.DataFrame(IntDens_TCFLef / IntDens_pCIG)
electroporation_norm.columns = ['Intensity']
electroporation_norm.index.names = ['Treatment', 'Embryo', 'Selection']
electroporation_norm.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Intensity
Treatment,Embryo,Selection,Unnamed: 3_level_1
Cntl,Emb1,sec1,0.835868
Cntl,Emb1,sec2,0.800914
Cntl,Emb1,sec3,1.504802
Cntl,Emb2,sec1,1.603303
Cntl,Emb2,sec2,2.509863


In [12]:
# Pull out Cntl and Expt ratios
ratios_Cntl = electroporation_norm.xs('Cntl', level='Treatment')['Intensity'] 
ratios_Expt = electroporation_norm.xs('Expt', level='Treatment')['Intensity'] 

# Generate ratios as TCFLef/pCIG
ratios_sections = pd.DataFrame(ratios_Expt / ratios_Cntl)
ratios_sections.columns = ['Intensity']
averaged_sections = ratios_sections.groupby('Embryo').mean()

In [16]:
# Print out csvs with final results
averaged_sections.to_csv('Normalized_Intensity_Ratios.csv')
ratios_Cntl.to_csv('Electro_Norm_Intensities_Cntl.csv')
ratios_Expt.to_csv('Electro_Norm_Intensities_Expt.csv')