### Modules import

In [2]:
import pandas as pd
from copairs.map import run_pipeline
import numpy as np

import logging
logging.basicConfig(format='%(levelname)s:%(asctime)s:%(name)s:%(message)s')
logging.getLogger("copairs").setLevel(logging.INFO)

### Reading the dataframe 
Batch 1 consists of plate with standard CP dye and other plate stained with Tocris Mitobrilliant dye

In [3]:
batch1_negcon_df = pd.read_csv('gct\\2023_05_15_Batch1\\2023_05_15_Batch1_normalized_feature_select_negcon_batch.csv.gz')

In [4]:
batch2_negcon_df = pd.read_csv('gct\\2023_05_17_Batch2\\2023_05_17_Batch2_normalized_feature_select_negcon_batch.csv.gz')

### Analysis - Plate wise with respect to control DMSO wells
#### Defining parameters to compute map 

In [5]:
pert_col = 'Metadata_broad_sample'
control_col = 'Metadata_control_type'

In [16]:
pos_sameby = [pert_col]
pos_diffby = []

neg_sameby = []
neg_diffby = [control_col]
null_size =10000

### Batch 1
Since Batch1 consists of both the plates - standard CP dyes and the mitobrilliant dye, the dataframe is split

In [7]:
standard_negcon_df  = batch1_negcon_df.loc[batch1_negcon_df['Metadata_Plate'] == 'BR00122250']

In [8]:
mito_negcon_df = batch1_negcon_df.loc[batch1_negcon_df['Metadata_Plate'] == 'BR00122246']

In [17]:
metadata_names_std = [c for c in standard_negcon_df.columns if c.startswith('Metadata')]
feature_names_std = [c for c in standard_negcon_df.columns if not c.startswith('Metadata')]
feats_std = standard_negcon_df[feature_names_std].values
dframe_std = standard_negcon_df[metadata_names_std]

In [21]:
dframe_std[control_col].fillna('trt', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dframe_std[control_col].fillna('trt', inplace=True)


In [22]:
result_std = run_pipeline(dframe_std, feats_std, 
                          pos_sameby, pos_diffby,
                          neg_sameby, neg_diffby,
                          null_size)

INFO:2023-07-14 12:11:08,434:copairs:Indexing metadata...
INFO:2023-07-14 12:11:08,445:copairs:Finding positive pairs...
INFO:2023-07-14 12:11:08,448:copairs:dropping dups...
INFO:2023-07-14 12:11:08,452:copairs:Finding negative pairs...
INFO:2023-07-14 12:11:08,527:copairs:dropping dups...
INFO:2023-07-14 12:11:08,536:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 12:11:09,407:copairs:Computing negative similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 12:11:10,373:copairs:Building rank lists...
INFO:2023-07-14 12:11:10,414:copairs:Computing average precision...
INFO:2023-07-14 12:11:10,425:copairs:Computing null distributions...
INFO:2023-07-14 12:11:11,446:copairs:Computing P-values...
INFO:2023-07-14 12:11:11,453:copairs:Creating result DataFrame...
INFO:2023-07-14 12:11:11,456:copairs:Finished.


In [24]:
result_std.to_csv('copairs_csv\\Result_Negcon_wrt_Controls_StandardCP.csv')

In [25]:
from copairs.map import aggregate

In [26]:
agg_result_std = aggregate(result_std, sameby=pos_sameby, threshold=0.05)
agg_result_std.to_csv('copairs_csv\\Aggregate_result_Negcon_wrt_Controls_StandardCP.csv')

#### Mito data 

In [27]:
metadata_names_mito = [ c for c in mito_negcon_df.columns if c.startswith('Metadata')]
feature_names_mito = [c for c in mito_negcon_df.columns if not c.startswith('Metadata')]
feats_mito = mito_negcon_df[feature_names_mito].values
dframe_mito = mito_negcon_df[metadata_names_mito]

In [28]:
dframe_mito[control_col].fillna('trt', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dframe_mito[control_col].fillna('trt', inplace=True)


In [29]:
result_mito = run_pipeline(dframe_mito, feats_mito,
                           pos_sameby, pos_diffby,
                           neg_sameby, neg_diffby,
                           null_size)

INFO:2023-07-14 12:17:36,763:copairs:Indexing metadata...
INFO:2023-07-14 12:17:36,776:copairs:Finding positive pairs...
INFO:2023-07-14 12:17:36,777:copairs:dropping dups...
INFO:2023-07-14 12:17:36,781:copairs:Finding negative pairs...
INFO:2023-07-14 12:17:36,804:copairs:dropping dups...
INFO:2023-07-14 12:17:36,815:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 12:17:37,633:copairs:Computing negative similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 12:17:38,557:copairs:Building rank lists...
INFO:2023-07-14 12:17:38,589:copairs:Computing average precision...
INFO:2023-07-14 12:17:38,597:copairs:Computing null distributions...
INFO:2023-07-14 12:17:39,725:copairs:Computing P-values...
INFO:2023-07-14 12:17:39,733:copairs:Creating result DataFrame...
INFO:2023-07-14 12:17:39,733:copairs:Finished.


In [30]:
result_mito.to_csv('copairs_csv\\Result_Negcon_wrt_Controls_Tocris_mitobrilliant.csv')

In [31]:
agg_result_mito = aggregate(result_mito, sameby=pos_sameby, threshold=0.05)
agg_result_mito.to_csv('copairs_csv\\Aggregate_result_Negcon_wrt_Controls_Tocris_Mitobrilliant.csv')

#### Batch2 - Long stoke shifted actin 

In [32]:
metadata_names_act = [c for c in batch2_negcon_df.columns if c.startswith('Metadata')]
feature_names_act = [ c for c in batch2_negcon_df.columns if not c.startswith('Metadata')]
feats_act = batch2_negcon_df[feature_names_act].values
dframe_act = batch2_negcon_df[metadata_names_act]

In [33]:
dframe_act[control_col].fillna('trt', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dframe_act[control_col].fillna('trt', inplace=True)


In [34]:
result_act = run_pipeline(dframe_act, feats_act,
                          pos_sameby, pos_diffby, 
                          neg_sameby, neg_diffby,
                          null_size)

INFO:2023-07-14 12:20:25,241:copairs:Indexing metadata...
INFO:2023-07-14 12:20:25,254:copairs:Finding positive pairs...
INFO:2023-07-14 12:20:25,256:copairs:dropping dups...
INFO:2023-07-14 12:20:25,261:copairs:Finding negative pairs...
INFO:2023-07-14 12:20:25,284:copairs:dropping dups...
INFO:2023-07-14 12:20:25,297:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 12:20:26,145:copairs:Computing negative similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 12:20:27,088:copairs:Building rank lists...
INFO:2023-07-14 12:20:27,120:copairs:Computing average precision...
INFO:2023-07-14 12:20:27,127:copairs:Computing null distributions...
INFO:2023-07-14 12:20:28,182:copairs:Computing P-values...
INFO:2023-07-14 12:20:28,191:copairs:Creating result DataFrame...
INFO:2023-07-14 12:20:28,191:copairs:Finished.


In [35]:
result_act.to_csv('copairs_csv\\Result_Negcon_wrt_Controls_Phalloidin400LS.csv')

In [36]:
agg_result_act = aggregate(result_act, sameby=pos_sameby, threshold=0.05)
agg_result_act.to_csv('copairs_csv\\Aggregate_result_Negcon_wrt_Controls_Phalloidin400LS.csv')

#### Combining all the aggregated results for easier plotting 

In [37]:
agg_result_std = agg_result_std.rename(columns = {'average_precision': 'average_precision_std'})
agg_result_mito = agg_result_mito.rename(columns = {'average_precision':'average_precision_mito'})
agg_result_act = agg_result_act.rename(columns = {'average_precision':'average_precision_act'})

In [38]:
agg_result_std_subset = agg_result_std[['Metadata_broad_sample', 'average_precision_std']]
agg_result_mito_subset = agg_result_mito[['Metadata_broad_sample', 'average_precision_mito']]
agg_result_act_subset = agg_result_act[['Metadata_broad_sample', 'average_precision_act']]

In [39]:
combined_df = pd.merge(agg_result_std_subset, agg_result_mito_subset, on='Metadata_broad_sample')
combined_df  =pd.merge(combined_df,agg_result_act_subset, on='Metadata_broad_sample')

#### Adding metadata information to the combined_df 


In [40]:
moa_metadata = pd.read_csv('copairs_csv\\LC00009948_MoA_Common_Names.csv')
moa_metadata = moa_metadata.rename(columns = {'BRD with batch':'Metadata_broad_sample'})

##### Extracting BRD ID from BROAD sample name 

In [41]:
def BRD_ID(i):
    if type(i) != float:
        ID = i.split('-')
        return ID[1]

In [42]:
combined_df['BRD ID'] = combined_df['Metadata_broad_sample'].map(BRD_ID)
combined_moa_df = pd.merge(combined_df,moa_metadata, on = 'BRD ID')

### Generating columns for difference in mAP

In [43]:
 
combined_moa_df['std_vs_act'] = combined_moa_df['average_precision_std'] - combined_moa_df['average_precision_act']
combined_moa_df['std_vs_mito'] = combined_moa_df['average_precision_std'] - combined_moa_df['average_precision_mito']


In [44]:
combined_moa_df.to_csv('copairs_csv\\PrecisionValues_with_MoA_allplates_Negcon_wrt_Controls.csv')