In [1]:
### Calculation of gene level profiles from the Hela dmem guide level profiles for the PERISCOPE manuscript. ###
### Script by Meraj Ramezani(mramezan@broadinstitute.org) ###
# Import relevant librariesimport pandas as pdimport pandas as pd
import pandas as pd
import numpy as np
from pycytominer import normalize, feature_select ,aggregate,consensus
from pycytominer.cyto_utils import output,infer_cp_features
from pycytominer.cyto_utils.util import (
    get_pairwise_correlation,
    check_correlation_method,
    infer_cp_features,
)

In [2]:
# load normalized_feature_selected profiles for the cp257 dmem arm
filename = '(input the file address)/20210422_6W_CP257_guide_normalized_feature_select_median_merged_ALLBATCHES___DMEM___ALLWELLS.csv.gz'
dmem_median_df = pd.read_csv(filename)
dmem_median_df

Unnamed: 0,Metadata_Foci_Barcode_MatchedTo_GeneCode,Metadata_Foci_Barcode_MatchedTo_Barcode,Cells_AreaShape_CentralMoment_0_3,Cells_AreaShape_CentralMoment_1_0,Cells_AreaShape_CentralMoment_1_2,Cells_AreaShape_CentralMoment_2_1,Cells_AreaShape_CentralMoment_2_3,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_Extent,...,Nuclei_Texture_SumVariance_DAPI_Painting_10_02_256,Nuclei_Texture_SumVariance_DAPI_Painting_10_03_256,Nuclei_Texture_SumVariance_Mito_10_03_256,Nuclei_Texture_SumVariance_Mito_5_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_03_256,Nuclei_Texture_SumVariance_Phalloidin_5_01_256,Nuclei_Texture_SumVariance_WGA_10_01_256,Nuclei_Texture_SumVariance_WGA_10_03_256,Nuclei_Texture_SumVariance_WGA_5_01_256
0,A1BG,CAAGAGAAAGACCACGAGCA,0.016441,-0.000100,-0.138460,0.003857,0.001038,0.980080,0.788680,-0.392810,...,-0.68377,-0.48894,-0.048677,-0.028407,-0.105700,-0.240800,-0.181300,-0.28261,-0.219630,-0.428860
1,A1BG,CATCTTCTTTCACCTGAACG,0.008520,0.171140,0.000794,0.006531,0.000064,0.293300,0.521390,0.188500,...,0.31212,0.35947,-0.154080,-0.017084,-0.076653,-0.022525,-0.009099,-0.29626,-0.163430,-0.488400
2,A1BG,CTCCGGGGAGAACTCCGGCG,-0.002752,-0.267570,-0.000020,-0.001949,-0.009853,0.188720,0.085372,0.260530,...,-0.11342,-0.32553,-0.008285,-0.032883,-0.006471,-0.175300,0.008365,-0.38693,-0.139000,-0.308780
3,A1BG,TGGAAGTCCACTCCACTCAG,-0.002541,-0.010418,-0.000956,0.011877,0.000071,-0.128240,-0.334610,0.072320,...,-0.44236,-0.22552,-0.048856,-0.050878,-0.022580,-0.050792,-0.038298,-0.14036,0.252750,0.008285
4,A1CF,AGTTATGTTAGGTATACCCG,0.004307,0.037445,-0.032165,-0.002427,0.000261,-0.362320,0.478180,-0.123290,...,-0.22953,-0.43204,-0.003427,-0.064101,0.136110,0.061535,-0.134130,-0.26887,0.023397,-0.261620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84267,nontargeting,TTTATGCATTTAATACGCCG,0.001273,0.018101,0.000890,0.001313,-0.000374,-0.055722,0.099351,0.069365,...,-0.23705,-0.20256,-0.031417,-0.037462,-0.114560,-0.132490,-0.107730,-0.17507,-0.217230,-0.070118
84268,nontargeting,TTTCTAGTTACTACTGGACG,0.000851,0.007969,-0.002324,0.001188,-0.000311,-0.069128,-0.055467,0.052259,...,-0.22518,-0.24198,-0.070424,-0.033117,-0.123770,-0.146650,-0.123340,-0.19341,-0.172650,-0.156660
84269,nontargeting,TTTGGCAGTACCTTTTATTA,-0.002488,-0.020554,-0.002961,0.001481,-0.000375,-0.145710,0.007123,0.134060,...,-0.24169,-0.25947,-0.097440,-0.045132,-0.129210,-0.142610,-0.131210,-0.13088,-0.176260,-0.191660
84270,nontargeting,TTTTACCTTGTTCACATGGA,-0.001427,0.018769,-0.001960,-0.001710,-0.000271,-0.124700,0.070634,0.011164,...,-0.16827,-0.20667,-0.107190,-0.053795,-0.105610,-0.130490,-0.109310,-0.24267,-0.220700,-0.218470


In [3]:
# Filter out irrelavant guides
filename = '(input the file address)/wgs_guide_list.csv'
guide_df = pd.read_csv(filename)
cp257_guide_df = guide_df.query('dialout == 1 | dialout ==3')
cp257_guide_list = list(cp257_guide_df['sgRNA'])
dmem_median_df = dmem_median_df[dmem_median_df["Metadata_Foci_Barcode_MatchedTo_Barcode"].isin(cp257_guide_list)]
dmem_median_df

Unnamed: 0,Metadata_Foci_Barcode_MatchedTo_GeneCode,Metadata_Foci_Barcode_MatchedTo_Barcode,Cells_AreaShape_CentralMoment_0_3,Cells_AreaShape_CentralMoment_1_0,Cells_AreaShape_CentralMoment_1_2,Cells_AreaShape_CentralMoment_2_1,Cells_AreaShape_CentralMoment_2_3,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_Extent,...,Nuclei_Texture_SumVariance_DAPI_Painting_10_02_256,Nuclei_Texture_SumVariance_DAPI_Painting_10_03_256,Nuclei_Texture_SumVariance_Mito_10_03_256,Nuclei_Texture_SumVariance_Mito_5_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_03_256,Nuclei_Texture_SumVariance_Phalloidin_5_01_256,Nuclei_Texture_SumVariance_WGA_10_01_256,Nuclei_Texture_SumVariance_WGA_10_03_256,Nuclei_Texture_SumVariance_WGA_5_01_256
0,A1BG,CAAGAGAAAGACCACGAGCA,0.016441,-0.000100,-0.138460,0.003857,0.001038,0.980080,0.788680,-0.392810,...,-0.68377,-0.48894,-0.048677,-0.028407,-0.105700,-0.240800,-0.181300,-0.28261,-0.219630,-0.428860
1,A1BG,CATCTTCTTTCACCTGAACG,0.008520,0.171140,0.000794,0.006531,0.000064,0.293300,0.521390,0.188500,...,0.31212,0.35947,-0.154080,-0.017084,-0.076653,-0.022525,-0.009099,-0.29626,-0.163430,-0.488400
2,A1BG,CTCCGGGGAGAACTCCGGCG,-0.002752,-0.267570,-0.000020,-0.001949,-0.009853,0.188720,0.085372,0.260530,...,-0.11342,-0.32553,-0.008285,-0.032883,-0.006471,-0.175300,0.008365,-0.38693,-0.139000,-0.308780
3,A1BG,TGGAAGTCCACTCCACTCAG,-0.002541,-0.010418,-0.000956,0.011877,0.000071,-0.128240,-0.334610,0.072320,...,-0.44236,-0.22552,-0.048856,-0.050878,-0.022580,-0.050792,-0.038298,-0.14036,0.252750,0.008285
4,A1CF,AGTTATGTTAGGTATACCCG,0.004307,0.037445,-0.032165,-0.002427,0.000261,-0.362320,0.478180,-0.123290,...,-0.22953,-0.43204,-0.003427,-0.064101,0.136110,0.061535,-0.134130,-0.26887,0.023397,-0.261620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84267,nontargeting,TTTATGCATTTAATACGCCG,0.001273,0.018101,0.000890,0.001313,-0.000374,-0.055722,0.099351,0.069365,...,-0.23705,-0.20256,-0.031417,-0.037462,-0.114560,-0.132490,-0.107730,-0.17507,-0.217230,-0.070118
84268,nontargeting,TTTCTAGTTACTACTGGACG,0.000851,0.007969,-0.002324,0.001188,-0.000311,-0.069128,-0.055467,0.052259,...,-0.22518,-0.24198,-0.070424,-0.033117,-0.123770,-0.146650,-0.123340,-0.19341,-0.172650,-0.156660
84269,nontargeting,TTTGGCAGTACCTTTTATTA,-0.002488,-0.020554,-0.002961,0.001481,-0.000375,-0.145710,0.007123,0.134060,...,-0.24169,-0.25947,-0.097440,-0.045132,-0.129210,-0.142610,-0.131210,-0.13088,-0.176260,-0.191660
84270,nontargeting,TTTTACCTTGTTCACATGGA,-0.001427,0.018769,-0.001960,-0.001710,-0.000271,-0.124700,0.070634,0.011164,...,-0.16827,-0.20667,-0.107190,-0.053795,-0.105610,-0.130490,-0.109310,-0.24267,-0.220700,-0.218470


In [5]:
# Perform profile aggregation
aggregate_columns = ['Metadata_Foci_Barcode_MatchedTo_GeneCode']
aggregate_features = 'infer'
aggregate_operation = 'median'

dmem_median_gene_df = aggregate(
            population_df=dmem_median_df,
            strata=aggregate_columns,
            features=aggregate_features,
            operation=aggregate_operation,
            )
dmem_median_gene_df

Unnamed: 0,Metadata_Foci_Barcode_MatchedTo_GeneCode,Cells_AreaShape_CentralMoment_0_3,Cells_AreaShape_CentralMoment_1_0,Cells_AreaShape_CentralMoment_1_2,Cells_AreaShape_CentralMoment_2_1,Cells_AreaShape_CentralMoment_2_3,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,...,Nuclei_Texture_SumVariance_DAPI_Painting_10_02_256,Nuclei_Texture_SumVariance_DAPI_Painting_10_03_256,Nuclei_Texture_SumVariance_Mito_10_03_256,Nuclei_Texture_SumVariance_Mito_5_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_03_256,Nuclei_Texture_SumVariance_Phalloidin_5_01_256,Nuclei_Texture_SumVariance_WGA_10_01_256,Nuclei_Texture_SumVariance_WGA_10_03_256,Nuclei_Texture_SumVariance_WGA_5_01_256
0,A1BG,0.002990,-0.005259,-0.000488,0.005194,0.000068,0.241010,0.303381,0.130410,-0.408025,...,-0.277890,-0.275525,-0.048766,-0.030645,-0.049617,-0.113046,-0.023698,-0.289435,-0.151215,-0.368820
1,A1CF,-0.001565,0.042890,-0.013942,-0.001427,-0.000015,-0.005530,0.327585,-0.026081,-0.118382,...,-0.009005,-0.086070,-0.012707,-0.028892,-0.021251,-0.052420,-0.097425,-0.092376,-0.053896,-0.137581
2,A2M,-0.003262,0.006896,0.000398,0.007666,0.000616,0.078667,0.227274,-0.133490,-0.203220,...,-0.097145,-0.037404,-0.061559,-0.020459,-0.075545,-0.116700,-0.133095,-0.075015,-0.146159,-0.182705
3,A2ML1,-0.003586,0.064563,0.001254,-0.002624,0.001518,-0.217685,0.088072,0.278430,0.141955,...,-0.171836,-0.218509,-0.139665,-0.054083,-0.119030,-0.063259,-0.127949,-0.146157,-0.224660,-0.130919
4,A3GALT2,-0.003544,-0.073971,-0.007251,0.038333,0.005520,-0.024553,0.346715,-0.180595,-0.071338,...,-0.531650,-0.580755,-0.094995,-0.060371,-0.198040,-0.181545,-0.227165,-0.245581,-0.433260,-0.346015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20389,ZYG11B,-0.004997,0.025764,0.010048,0.000689,0.000578,0.063311,0.120570,-0.296945,-0.211495,...,-0.284210,-0.274920,-0.057896,-0.017460,-0.127452,-0.160505,-0.139515,-0.100329,-0.099580,-0.044556
20390,ZYX,-0.001728,0.127841,-0.003141,-0.000141,0.000653,-0.057800,-0.026700,0.050321,-0.057195,...,0.126379,-0.020406,-0.070986,-0.017315,-0.124905,-0.129980,-0.075756,0.019228,-0.261445,0.002777
20391,ZZEF1,-0.002265,-0.025710,-0.010416,-0.026102,0.000046,-0.036503,0.055400,0.115180,-0.083702,...,-0.068458,-0.202371,-0.158160,-0.057481,-0.109966,-0.057180,-0.089847,-0.457715,-0.409825,-0.516860
20392,ZZZ3,-0.007898,-0.085499,0.012159,0.019002,0.001708,-0.233295,-0.294195,0.156406,0.151420,...,-0.136883,0.021650,0.008554,-0.011801,-0.105246,0.003894,-0.045031,-0.049809,0.046664,-0.085787


In [6]:
dmem_median_gene_df.to_csv('20210422_6W_CP257_guide_normalized_feature_select_median_merged_ALLBATCHES___DMEM___ALLWELLS_plate_level_gene_aggregated.csv.gz',index=False)

In [7]:
dmem_median_gene_index_df = dmem_median_gene_df.set_index('Metadata_Foci_Barcode_MatchedTo_GeneCode').dropna()
dmem_median_gene_index_df

Unnamed: 0_level_0,Cells_AreaShape_CentralMoment_0_3,Cells_AreaShape_CentralMoment_1_0,Cells_AreaShape_CentralMoment_1_2,Cells_AreaShape_CentralMoment_2_1,Cells_AreaShape_CentralMoment_2_3,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_HuMoment_1,...,Nuclei_Texture_SumVariance_DAPI_Painting_10_02_256,Nuclei_Texture_SumVariance_DAPI_Painting_10_03_256,Nuclei_Texture_SumVariance_Mito_10_03_256,Nuclei_Texture_SumVariance_Mito_5_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_01_256,Nuclei_Texture_SumVariance_Phalloidin_10_03_256,Nuclei_Texture_SumVariance_Phalloidin_5_01_256,Nuclei_Texture_SumVariance_WGA_10_01_256,Nuclei_Texture_SumVariance_WGA_10_03_256,Nuclei_Texture_SumVariance_WGA_5_01_256
Metadata_Foci_Barcode_MatchedTo_GeneCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.002990,-0.005259,-0.000488,0.005194,0.000068,0.241010,0.303381,0.130410,-0.408025,-0.072026,...,-0.277890,-0.275525,-0.048766,-0.030645,-0.049617,-0.113046,-0.023698,-0.289435,-0.151215,-0.368820
A1CF,-0.001565,0.042890,-0.013942,-0.001427,-0.000015,-0.005530,0.327585,-0.026081,-0.118382,-0.036208,...,-0.009005,-0.086070,-0.012707,-0.028892,-0.021251,-0.052420,-0.097425,-0.092376,-0.053896,-0.137581
A2M,-0.003262,0.006896,0.000398,0.007666,0.000616,0.078667,0.227274,-0.133490,-0.203220,-0.089592,...,-0.097145,-0.037404,-0.061559,-0.020459,-0.075545,-0.116700,-0.133095,-0.075015,-0.146159,-0.182705
A2ML1,-0.003586,0.064563,0.001254,-0.002624,0.001518,-0.217685,0.088072,0.278430,0.141955,-0.034680,...,-0.171836,-0.218509,-0.139665,-0.054083,-0.119030,-0.063259,-0.127949,-0.146157,-0.224660,-0.130919
A3GALT2,-0.003544,-0.073971,-0.007251,0.038333,0.005520,-0.024553,0.346715,-0.180595,-0.071338,0.033340,...,-0.531650,-0.580755,-0.094995,-0.060371,-0.198040,-0.181545,-0.227165,-0.245581,-0.433260,-0.346015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYG11B,-0.004997,0.025764,0.010048,0.000689,0.000578,0.063311,0.120570,-0.296945,-0.211495,-0.056800,...,-0.284210,-0.274920,-0.057896,-0.017460,-0.127452,-0.160505,-0.139515,-0.100329,-0.099580,-0.044556
ZYX,-0.001728,0.127841,-0.003141,-0.000141,0.000653,-0.057800,-0.026700,0.050321,-0.057195,-0.178050,...,0.126379,-0.020406,-0.070986,-0.017315,-0.124905,-0.129980,-0.075756,0.019228,-0.261445,0.002777
ZZEF1,-0.002265,-0.025710,-0.010416,-0.026102,0.000046,-0.036503,0.055400,0.115180,-0.083702,-0.045825,...,-0.068458,-0.202371,-0.158160,-0.057481,-0.109966,-0.057180,-0.089847,-0.457715,-0.409825,-0.516860
ZZZ3,-0.007898,-0.085499,0.012159,0.019002,0.001708,-0.233295,-0.294195,0.156406,0.151420,-0.226055,...,-0.136883,0.021650,0.008554,-0.011801,-0.105246,0.003894,-0.045031,-0.049809,0.046664,-0.085787


In [29]:
def heat_map(data,title,file_name):
        corr = data.corr()
        
        plt.clf()
        
        sns.set(rc={'figure.figsize':(30,30)})

        ax = sns.heatmap(
            corr, 
            vmin=-1, vmax=1, center=0,
            cmap=sns.diverging_palette(10, 220, n=200),
            linewidth = 0.2,
            linecolor='white',
            square=True,
            cbar_kws={"shrink": .76}
        )
        ax.set_xticklabels(
            ax.get_xticklabels(),
            rotation=90,
            horizontalalignment='right'
        )

        ax.set_title(title,fontsize=30)
        
        plt.tight_layout()
        fig = ax.get_figure()
        
        output_file = pathlib.Path(f"{file_name}")
        #fig.savefig(output_file,dpi=300)

def clustered_heat_map(data,title,file_name):

        corr = data.corr()
        sns.set(font_scale=0.8)


        ax = sns.clustermap(
            corr, 
            vmin=-1, vmax=1, center=0,
            method = 'ward',
            figsize = (30 ,30),
            cmap=sns.diverging_palette(10, 220, n=200),
            linewidth = 0.2,
            linecolor='white',
            dendrogram_ratio =0.1,
            square=True,
            cbar_kws={"shrink": .6}
        )
        
        ax.fig.suptitle(title,fontsize=30)
        
        output_file = pathlib.Path(f"{file_name}")
        ax.savefig(output_file,dpi=300) 


In [30]:
clustered_heat_map(dmem_median_gene_index_df.T,'CP257 DMEM_arm Median Aggregated Clustered Profile Heatmap - gene level hits(From well_level)','cp257_dmem_median_profile_heatmap_clustering_gene_hits_well_level.png')
heat_map(dmem_median_gene_index_df.T,'CP257 DMEM_arm Median Aggregated Profile Heatmap - gene level hits(From well_level)','cp257_dmem_median_profile_heatmap_gene_hits_well_level')