## Check operating system and dependencies

In [5]:
% Get operating system information
if ispc
    os_info = system('ver');
elseif isunix
    os_info = system('uname -a');
elseif ismac
    os_info = system('sw_vers');
else
    os_info = 'Unknown OS';
end
disp(os_info);

Linux uger-d024.broadinstitute.org 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue May 14 11:55:25 EDT 2024 x86_64 x86_64 x86_64 GNU/Linux
     0



In [2]:
% List all installed toolboxes and their versions
toolboxes = ver;
disp(toolboxes);

  1x92 struct array with fields:

    Name
    Version
    Release
    Date



In [6]:
% Get MATLAB version and other session information
session_info = struct();
session_info.MATLABVersion = version;
session_info.Toolboxes = ver;
session_info.Date = datetime('now');
disp(session_info);

    MATLABVersion: '9.8.0.1323502 (R2020a)'
        Toolboxes: [1x92 struct]
             Date: 29-Aug-2024 21:05:24



In [4]:
% List installed add-ons and dependencies
addons = matlab.addons.installedAddons();
disp(addons);

                      Name                       Version     Enabled    Identifier
    _________________________________________    ________    _______    __________

    "Vehicle Dynamics Blockset"                  "1.4"        true         "VE"   
    "Simulink Coder"                             "9.3"        true         "RT"   
    "Fixed-Point Designer"                       "7.0"        true         "PO"   
    "Optimization Toolbox"                       "8.5"        true         "OP"   
    "Wireless HDL Toolbox"                       "2.0"        true         "LH"   
    "Simulink Report Generator"                  "5.8"        true         "SR"   
    "Control System Toolbox"                     "10.8"       true         "CT"   
    "Datafeed Toolbox"                           "5.9.1"      true         "DF"   
    "SimEvents"                                  "5.8"        true         "SE"   
    "Reinforcement Learning Toolbox"             "1.2"        true         "RL"   
   

# Start

In [1]:
datadir = '/idi/cgtb/morzech/idmp/combine_screens_456';
wkdir = '/idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final';
%wkdir = fullfile(datadir,'pcls_remove_gsk_no_broad_sar_pearson');

mk_cd_dir(wkdir, true);
imatlab_export_fig('print-png')

kabx_annot_path = '/idi/cgtb/abond/annotation/KABX_for_manuscript_20240413.xlsx'


kabx_annot_path =

    '/idi/cgtb/abond/annotation/KABX_for_manuscript_20240413.xlsx'



In [3]:
annot = xls2table(kabx_annot_path,1,true);

1 pert_id
2 pert_id_count
3 pert_id_alt
4 pert_id_pubchem_cid
5 pubchem_link
6 pert_iname
7 pert_class
8 pert_reference
9 target_process
10 target_pathway
11 target_description
12 target_description_long
13 target_description_multi_detailed
14 protein_target
15 gene_target
16 hypomorph_target
17 hypomorph_interest
18 ProdrugActivation_
19 notes
20 evidence_type
21 computational
22 biochemical
23 genetic
24 high_confidence
25 canonical_smiles
26 inchi_key
27 inchi_key_prefix
28 inchi_key_prefix_count


In [4]:
size(annot)


ans =

   437    28



In [5]:
annot.pert_id


ans =

  437x1 cell array

    {'BRD-K00093274' }
    {'BRD-K00093272' }
    {'BRD-K00093273' }
    {'BRD-K99844592' }
    {'BRD-K52025859' }
    {'BRD-K32795028' }
    {'BRD-K45179240' }
    {'BRD-K60731427' }
    {'BRD-K00092685' }
    {'BRD-K41413117' }
    {'BRD-K80267133' }
    {'BRD-K74016539' }
    {'BRD-K65278948' }
    {'BRD-K58069855' }
    {'BRD-K07208025' }
    {'BRD-K88429204' }
    {'BRD-A86736466' }
    {'BRD-K08151102' }
    {'BRD-K32842773' }
    {'BRD-K59456551' }
    {'BRD-A74914197' }
    {'BRD-K25799961' }
    {'BRD-K10671814' }
    {'BRD-K14116214' }
    {'BRD-K02594908' }
    {'BRD-K50859149' }
    {'BRD-K32273377' }
    {'BRD-K31682896' }
    {'BRD-K21520694' }
    {'BRD-K17205817' }
    {'BRD-K28494619' }
    {'BRD-K55250441' }
    {'BRD-K93524252' }
    {'BRD-K11640013' }
    {'BRD-K14705039' }
    {'BRD-K71125014' }
    {'BRD-K87492696' }
    {'BRD-K19742012' }
    {'BRD-K76845197' }
    {'BRD-K52416806' }
    {'BRD-K62363391' }
    {'BRD-K05512067' }
    {'

## Load gcts

In [6]:
gr = parse_gctx(fullfile(datadir,'gr_screens_456_n79873x340.gctx'));

Reading /idi/cgtb/morzech/idmp/combine_screens_456/gr_screens_456_n79873x340.gctx [340x79873]
Done [22.57 s].


In [7]:
grzs = parse_gctx(fullfile(datadir,'grzs_screens_456_n79873x340.gctx'));

Reading /idi/cgtb/morzech/idmp/combine_screens_456/grzs_screens_456_n79873x340.gctx [340x79873]
Done [20.99 s].


In [8]:
grzs_czs = parse_gctx(fullfile(datadir,'grzs_czs_screens_456_n79873x340.gctx'));

Reading /idi/cgtb/morzech/idmp/combine_screens_456/grzs_czs_screens_456_n79873x340.gctx [340x79873]
Done [21.01 s].


## Generate col_meta

In [9]:
[~,col_meta] = gct2meta(grzs);

gct2meta> Creating row and column metadata tables



In [10]:
headt(col_meta)


ans =

  33x3 table

    idx               field                              value               
    ___    ___________________________    ___________________________________

     1     {'cid'                    }    {'tbda1:0013X 55UXB:0.781250uM'   }
     2     {'broad_id'               }    {'0013X 55UXB'                    }
     3     {'canonical_smiles'       }    {'-666'                           }
     4     {'cid_org'                }    {'tbda1:0013X 55UXB:0.781250uM'   }
     5     {'dose_series_id'         }    {'tbda_100.0'                     }
     6     {'n_replicates'           }    {[                              2]}
     7     {'pert_dose'              }    {[                         0.7812]}
     8     {'pert_dose_act'          }    {[                         0.7812]}
     9     {'pert_dose_unit'         }    {'uM'                             }
    10     {'pert_id'                }    {'0013X 55UXB'                    }
    11     {'pert_idose'             }   

## Combine kabx with Broad SAR

In [11]:
annot.pcl_desc = annot.target_description;

In [12]:
fields = {'pert_id','pcl_desc'};
%pcl_annot = [annot(:,fields);broad_sar(:,fields)];
pcl_annot = [annot(:,fields)]; % LEAVE OUT Broad SAR

In [13]:
size(col_meta)
col_meta = outerjoin(col_meta,pcl_annot,'Keys','pert_id','RightVariables','pcl_desc','Type','left','MergeKeys',true);
size(col_meta)


ans =

       79873          33


ans =

       79873          34



In [14]:
wtable(col_meta,fullfile(wkdir,'col_meta.txt'))

In [15]:
col_meta = sortrows(col_meta,{'pcl_desc','broad_id','pert_dose'});

## Exclude pool_reproducibility

In [16]:
tabulate(col_meta.x_pert_collection)

                 Value    Count   Percent
                  tbda     8094     10.13%
             screen5cp    16488     20.64%
                  -666     8812     11.03%
                   gsk      256      0.32%
                  WuXi    10078     12.62%
                 Otava     3470      4.34%
             broad_sar     1368      1.71%
               Edelris     1917      2.40%
                  s4wk     6653      8.33%
                    bi     9910     12.41%
                  kabx    11904     14.90%
                fadD32      152      0.19%
  broad_sar_literature      168      0.21%
  pool_reproducibility       80      0.10%
            jg_QcrB-W2       24      0.03%
            ghddi_efpA      104      0.13%
                 WAC40        8      0.01%
                 empty        1      0.00%
        kabx_extension      136      0.17%
                poscon      200      0.25%
                   dxr        8      0.01%
       fadD32 coumarin       32      0.04%
        inpl

## Keep only entries that are annotated with detailed_target_decription

In [17]:
tabulate(sort(col_meta.pcl_desc))

                                               Value    Count   Percent
                                        30S ribosome      417      4.42%
                                   30S ribosome|InfB       40      0.42%
                         30S ribosome|tRNA synthesis       20      0.21%
                                        50S ribosome      740      7.85%
                                  50S ribosome|FusA1       40      0.42%
                                        ATP synthase       38      0.40%
                                               AccD6       60      0.64%
                                                 Alr      320      3.39%
                                            Alr|DdlA       28      0.30%
                                               CoaBC       16      0.17%
                                               CydAB       36      0.38%
                                              Cyp121       60      0.64%
                                                CysH

In [18]:
idx = cellfun(@isempty, col_meta.pcl_desc);
sum(idx)
col_meta(idx,:) = [];


ans =

       70446



In [19]:
wtable(col_meta, fullfile(wkdir,'col_meta_kabx.txt'))

## Unwrap detailed target description

In [20]:
size(col_meta)
col_meta_for_pcls = struct2table(unwrap_table(table2struct(col_meta),'pcl_desc','|'));
size(col_meta_for_pcls)


ans =

        9427          34


ans =

       11081          34



In [21]:
wtable(col_meta_for_pcls, fullfile(wkdir,'col_meta_kabx_for_pcls.txt'))

In [22]:
col_meta_for_pcls.pcl_desc = any2str(col_meta_for_pcls.pcl_desc);

## Slice gcts

In [23]:
gr.cid(1:10)
grzs.cid(1:10)


ans =

  10x1 cell array

    {'tbda1:0013X 55UXB:0.781250uM'  }
    {'tbda1:0013X 55UXB:1.562500uM'  }
    {'tbda1:0013X 55UXB:100.000000uM'}
    {'tbda1:0013X 55UXB:12.500000uM' }
    {'tbda1:0013X 55UXB:25.000000uM' }
    {'tbda1:0013X 55UXB:3.125000uM'  }
    {'tbda1:0013X 55UXB:50.000000uM' }
    {'tbda1:0013X 55UXB:6.250000uM'  }
    {'tbda1:0037U 73XXX:0.390625uM'  }
    {'tbda1:0037U 73XXX:0.781250uM'  }


ans =

  10x1 cell array

    {'tbda1:0013X 55UXB:0.781250uM'  }
    {'tbda1:0013X 55UXB:1.562500uM'  }
    {'tbda1:0013X 55UXB:100.000000uM'}
    {'tbda1:0013X 55UXB:12.500000uM' }
    {'tbda1:0013X 55UXB:25.000000uM' }
    {'tbda1:0013X 55UXB:3.125000uM'  }
    {'tbda1:0013X 55UXB:50.000000uM' }
    {'tbda1:0013X 55UXB:6.250000uM'  }
    {'tbda1:0037U 73XXX:0.390625uM'  }
    {'tbda1:0037U 73XXX:0.781250uM'  }



In [24]:
gr = ds_slice(gr,'cid',unique(col_meta_for_pcls.cid,'stable'));
grzs = ds_slice(grzs,'cid',unique(col_meta_for_pcls.cid,'stable'));
grzs_czs = ds_slice(grzs_czs,'cid',unique(col_meta_for_pcls.cid,'stable'));

In [25]:
mkgctx(fullfile(wkdir,'gr_pcls_screens456.gctx'),gr)
mkgctx(fullfile(wkdir,'grzs_pcls_screens456.gctx'),grzs)
mkgctx(fullfile(wkdir,'grzs_czs_pcls_screens456.gctx'),grzs_czs)

Saving HDF5 dataset to: /idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/gr_pcls_screens456_n9427x340.gctx...
Disabling compression.
Setting chunk size to: 340x771
done [1.85s].

ans =

    '/idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/gr_pcls_screens456_n9427x340.gctx'

Saving HDF5 dataset to: /idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_pcls_screens456_n9427x340.gctx...
Disabling compression.
Setting chunk size to: 340x771
done [1.50s].

ans =

    '/idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_pcls_screens456_n9427x340.gctx'

Saving HDF5 dataset to: /idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_czs_pcls_screens456_n9427x340.gctx...
Disabling compression.
Setting chunk size to: 340x771
done [1.43s].

ans =

    '/idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_czs_pcls_screens456_n9427x340.gctx'



## Create gmt

In [26]:
pcls = tbl2gmt(table2struct(col_meta_for_pcls),'group_field','pcl_desc','desc_field','pcl_desc','member_field','cid')


pcls = 

  71x1 struct array with fields:

    head
    desc
    entry
    len



## Remove ambiguous annotation

In [27]:
idx = ismember({pcls.head},{'NaN','unknown','whole cell only'});
sum(idx)
pcls(idx) = []


ans =

     0


pcls = 

  71x1 struct array with fields:

    head
    desc
    entry
    len



In [28]:
mkgmt(fullfile(wkdir,'new_pcls.gmt'), pcls)

## Calculate correlation and rank of correlation using grzs

In [29]:
grzs_corr = ds_corr(grzs, 'type', 'pearson');

In [30]:
grzs_corr_rank = grzs_corr;
mat = rankorder(grzs_corr.mat,'dim','row','direc','descend');
grzs_corr_rank.mat = (mat+mat')/2;

In [31]:
mkgctx(fullfile(wkdir,'grzs_for_pcls_screens456_pearson_corr.gctx'),grzs_corr)
mkgctx(fullfile(wkdir,'grzs_for_pcls_screens456_pearson_corr_rank.gctx'),grzs_corr_rank)

Saving HDF5 dataset to: /idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_for_pcls_screens456_pearson_corr_n9427x9427.gctx...
Disabling compression.
Setting chunk size to: 1000x262
done [6.60s].

ans =

    '/idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_for_pcls_screens456_pearson_corr_n9427x9427.gctx'

Saving HDF5 dataset to: /idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_for_pcls_screens456_pearson_corr_rank_n9427x9427.gctx...
Disabling compression.
Setting chunk size to: 1000x262
done [6.28s].

ans =

    '/idi/cgtb/abond/combine_screens_456/pcls_for_manuscript_final/grzs_for_pcls_screens456_pearson_corr_rank_n9427x9427.gctx'

