### Modules import

In [2]:
import pandas as pd
from copairs.map import run_pipeline
import numpy as np

import logging
logging.basicConfig(format='%(levelname)s:%(asctime)s:%(name)s:%(message)s')
logging.getLogger("copairs").setLevel(logging.INFO)

### Reading the dataframe 
Batch 1 consists of plate with standard CP dye and other plate stained with Tocris Mitobrilliant dye

In [3]:
batch1_df = pd.read_csv('gct\\2023_05_15_Batch1\\2023_05_15_Batch1_normalized_feature_select_batch.csv.gz')

##### Batch 2 consists of plate stained with Phalloidin 400LS

In [4]:
batch2_df = pd.read_csv('gct\\2023_05_17_Batch2\\2023_05_17_Batch2_normalized_feature_select_batch.csv.gz')

##### Defining parameters to compute map

In [5]:
plate_col = 'Metadata_Plate'
well_col = 'Metadata_Well'
pert_col = 'Metadata_broad_sample'

#### Analysis - Plate wise 

In [6]:
pos_sameby = [pert_col]
pos_diffby = []

neg_sameby = []
neg_diffby = [pert_col]
null_size =10000

#### Batch1 

#### Splitting the dataframe into standard  CP dyes and the mito dyes 

In [7]:
batch1_df.shape

(767, 722)

In [8]:
std_data = batch1_df.loc[batch1_df['Metadata_Plate'] == 'BR00122250']

In [9]:
std_data.shape

(384, 722)

In [10]:
mito_data = batch1_df.loc[batch1_df['Metadata_Plate'] == 'BR00122246']

In [11]:
mito_data.shape

(383, 722)

In [12]:
batch2_df.shape

(384, 720)

In [13]:
metadata_names_std = [c for c in std_data.columns if  c.startswith('Metadata')]
feature_names_std = [c for c in std_data.columns if not c.startswith('Metadata')]
feats_std = std_data[feature_names_std].values
dframe_std = std_data[metadata_names_std]
result_std = run_pipeline(dframe_std, feats_std, 
                      pos_sameby, pos_diffby,
                      neg_sameby, neg_diffby,
                      null_size
                     )

INFO:2023-07-12 10:29:00,473:copairs:Indexing metadata...
INFO:2023-07-12 10:29:00,478:copairs:Finding positive pairs...
INFO:2023-07-12 10:29:00,479:copairs:dropping dups...
INFO:2023-07-12 10:29:00,483:copairs:Finding negative pairs...
INFO:2023-07-12 10:29:00,627:copairs:dropping dups...
INFO:2023-07-12 10:29:00,697:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-12 10:29:01,347:copairs:Computing negative similarities...


  0%|          | 0/4 [00:00<?, ?it/s]

INFO:2023-07-12 10:29:02,562:copairs:Building rank lists...
INFO:2023-07-12 10:29:02,656:copairs:Computing average precision...
INFO:2023-07-12 10:29:02,665:copairs:Computing null distributions...
INFO:2023-07-12 10:29:03,487:copairs:Computing P-values...
INFO:2023-07-12 10:29:03,494:copairs:Creating result DataFrame...
INFO:2023-07-12 10:29:03,496:copairs:Finished.


In [14]:
result_std.to_csv('copairs_csv\\Result_StandardCP.csv')

In [15]:
from copairs.map import aggregate
agg_result_std = aggregate(result_std, sameby=pos_sameby, threshold=0.05)
agg_result_std.to_csv('copairs_csv\\Aggrerate_result_StandardCP.csv')

##### Mito data 

In [16]:
metadata_names_mito = [c for c in mito_data.columns if  c.startswith('Metadata')]
feature_names_mito = [c for c in mito_data.columns if not c.startswith('Metadata')]
feats_mito = mito_data[feature_names_mito].values
dframe_mito = mito_data[metadata_names_mito]
result_mito = run_pipeline(dframe_mito, feats_mito, 
                      pos_sameby, pos_diffby,
                      neg_sameby, neg_diffby,
                      null_size
                     )

INFO:2023-07-12 10:29:03,654:copairs:Indexing metadata...
INFO:2023-07-12 10:29:03,660:copairs:Finding positive pairs...
INFO:2023-07-12 10:29:03,661:copairs:dropping dups...
INFO:2023-07-12 10:29:03,664:copairs:Finding negative pairs...


INFO:2023-07-12 10:29:03,765:copairs:dropping dups...
INFO:2023-07-12 10:29:03,812:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-12 10:29:04,489:copairs:Computing negative similarities...


  0%|          | 0/4 [00:00<?, ?it/s]

INFO:2023-07-12 10:29:05,691:copairs:Building rank lists...
INFO:2023-07-12 10:29:05,800:copairs:Computing average precision...
INFO:2023-07-12 10:29:05,810:copairs:Computing null distributions...
INFO:2023-07-12 10:29:06,759:copairs:Computing P-values...
INFO:2023-07-12 10:29:06,766:copairs:Creating result DataFrame...
INFO:2023-07-12 10:29:06,767:copairs:Finished.


In [17]:
result_mito.to_csv('copairs_csv\\Result_Tocris_mitobrilliant.csv')

In [18]:
agg_result_mito = aggregate(result_mito, sameby=pos_sameby, threshold=0.05)
agg_result_mito.to_csv('copairs_csv\\Aggrerate_result_Tocris_mitobrilliant.csv')

##### Batch2 - Lond stoke shifted actin 

In [19]:
metadata_names_2 = [c for c in batch2_df.columns if  c.startswith('Metadata')]
feature_names_2 = [c for c in batch2_df.columns if not c.startswith('Metadata')]
feats_2 = batch2_df[feature_names_2].values
dframe_2 = batch2_df[metadata_names_2]
result_batch2 = run_pipeline(dframe_2, feats_2, 
                      pos_sameby, pos_diffby,
                      neg_sameby, neg_diffby,
                      null_size
                     )

INFO:2023-07-12 10:29:06,916:copairs:Indexing metadata...
INFO:2023-07-12 10:29:06,922:copairs:Finding positive pairs...
INFO:2023-07-12 10:29:06,923:copairs:dropping dups...
INFO:2023-07-12 10:29:06,926:copairs:Finding negative pairs...
INFO:2023-07-12 10:29:07,029:copairs:dropping dups...
INFO:2023-07-12 10:29:07,072:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-12 10:29:07,658:copairs:Computing negative similarities...


  0%|          | 0/4 [00:00<?, ?it/s]

INFO:2023-07-12 10:29:08,746:copairs:Building rank lists...
INFO:2023-07-12 10:29:08,828:copairs:Computing average precision...
INFO:2023-07-12 10:29:08,838:copairs:Computing null distributions...
INFO:2023-07-12 10:29:09,612:copairs:Computing P-values...
INFO:2023-07-12 10:29:09,620:copairs:Creating result DataFrame...
INFO:2023-07-12 10:29:09,622:copairs:Finished.


In [20]:
result_batch2.to_csv('copairs_csv\\Result_Phalloidin400LS.csv')

In [21]:
agg_result_act = aggregate(result_batch2, sameby=pos_sameby, threshold=0.05)
agg_result_act.to_csv('copairs_csv\\Aggrerate_result_Phalloidin400LS.csv')

#### Renaming columns

In [22]:
agg_result_std = agg_result_std.rename(columns = {'average_precision': 'average_precision_std'})
agg_result_mito = agg_result_mito.rename(columns = {'average_precision':'average_precision_mito'})
agg_result_act = agg_result_act.rename(columns = {'average_precision':'average_precision_act'})

#### Combining the average precision values into a single dataframe

In [23]:
agg_result_std_subset = agg_result_std[['Metadata_broad_sample', 'average_precision_std']]
agg_result_mito_subset = agg_result_mito[['Metadata_broad_sample', 'average_precision_mito']]
agg_result_act = agg_result_act[['Metadata_broad_sample', 'average_precision_act']]

In [24]:
combined_df = pd.merge(agg_result_std_subset, agg_result_mito_subset, on='Metadata_broad_sample')
combined_df  =pd.merge(combined_df,agg_result_act, on='Metadata_broad_sample')

In [25]:
combined_df

Unnamed: 0,Metadata_broad_sample,average_precision_std,average_precision_mito,average_precision_act
0,BRD-A12994259-001-11-9,0.109555,0.041064,0.118931
1,BRD-A22769835-300-05-7,0.011390,0.029497,0.023822
2,BRD-A53576514-048-02-8,0.046175,0.183302,0.059661
3,BRD-A87435144-001-01-6,0.106790,0.078665,0.133939
4,BRD-K00818915-001-02-9,0.189290,0.189756,0.071897
...,...,...,...,...
85,BRD-K98251413-001-07-9,0.034381,0.207946,0.218591
86,BRD-K99092662-001-02-9,0.181962,0.038368,0.111170
87,BRD-K99113996-001-02-0,1.000000,1.000000,1.000000
88,BRD-K99433989-001-02-9,0.162272,0.698810,0.354452


#### Adding metadata information to the combined_df 

In [26]:
moa_metadata = pd.read_csv('copairs_csv\\LC00009948_MoA_Common_Names.csv')
moa_metadata = moa_metadata.rename(columns = {'BRD with batch':'Metadata_broad_sample'})

##### Extracting BRD ID from BROAD sample name 

In [27]:
def BRD_ID(i):
    if type(i) != float:
        ID = i.split('-')
        return ID[1]



In [28]:
combined_df['BRD ID'] = combined_df['Metadata_broad_sample'].map(BRD_ID)
combined_moa_df = pd.merge(combined_df,moa_metadata, on = 'BRD ID')

In [30]:
combined_moa_df.head(30)

Unnamed: 0,Metadata_broad_sample_x,average_precision_std,average_precision_mito,average_precision_act,BRD ID,Metadata_broad_sample_y,Common Name,MoA
0,BRD-A12994259-001-11-9,0.109555,0.041064,0.118931,A12994259,BRD-A12994259-001-02-1,pomalidomide,tumor necrosis factor production inhibitor
1,BRD-A22769835-300-05-7,0.01139,0.029497,0.023822,A22769835,BRD-A22769835-300-05-7,homochlorcyclizine,antihistamine
2,BRD-A53576514-048-02-8,0.046175,0.183302,0.059661,A53576514,BRD-A53576514-048-14-3,orphenadrine,acetylcholine receptor antagonist
3,BRD-A87435144-001-01-6,0.10679,0.078665,0.133939,A87435144,BRD-A87435144-001-01-6,AZD7545,pyruvate dehydrogenase kinase inhibitor
4,BRD-K00818915-001-02-9,0.18929,0.189756,0.071897,K00818915,BRD-K00818915-003-01-8,SHP099,protein tyrosine kinase inhibitor
5,BRD-K01826510-001-09-9,0.037976,0.074683,0.018357,K01826510,BRD-K01826510-300-06-9,hydroxyzine,antihistamine
6,BRD-K02965346-001-01-8,0.916667,0.566919,0.858631,K02965346,BRD-K02965346-001-07-5,SU-11274,hepatocyte growth factor receptor inhibitor
7,BRD-K03063480-001-08-9,0.958333,0.580847,0.740972,K03063480,BRD-K03063480-001-06-6,PF-477736,CHK inhibitor
8,BRD-K04923131-001-17-9,1.0,1.0,0.586369,K04923131,BRD-K04923131-001-15-4,GSK-3-inhibitor-IX,glycogen synthase kinase inhibitor
9,BRD-K06182768-001-02-3,0.010157,0.06298,0.026257,K06182768,BRD-K06182768-001-06-4,A-366,histone lysine methyltransferase inhibitor


### Generating columns for difference in mAP 

In [33]:
combined_moa_df['std_vs_act'] = combined_moa_df['average_precision_std'] - combined_moa_df['average_precision_act']
combined_moa_df['std_vs_mito'] = combined_moa_df['average_precision_std'] - combined_moa_df['average_precision_mito']


In [34]:
combined_moa_df.to_csv('copairs_csv\\PrecisionValues_with_MoA_allplates.csv')

In [35]:
combined_moa_df

Unnamed: 0,Metadata_broad_sample_x,average_precision_std,average_precision_mito,average_precision_act,BRD ID,Metadata_broad_sample_y,Common Name,MoA,std_vs_act,std_vs_mito
0,BRD-A12994259-001-11-9,0.109555,0.041064,0.118931,A12994259,BRD-A12994259-001-02-1,pomalidomide,tumor necrosis factor production inhibitor,-0.009376,0.068491
1,BRD-A22769835-300-05-7,0.011390,0.029497,0.023822,A22769835,BRD-A22769835-300-05-7,homochlorcyclizine,antihistamine,-0.012432,-0.018106
2,BRD-A53576514-048-02-8,0.046175,0.183302,0.059661,A53576514,BRD-A53576514-048-14-3,orphenadrine,acetylcholine receptor antagonist,-0.013486,-0.137127
3,BRD-A87435144-001-01-6,0.106790,0.078665,0.133939,A87435144,BRD-A87435144-001-01-6,AZD7545,pyruvate dehydrogenase kinase inhibitor,-0.027149,0.028125
4,BRD-K00818915-001-02-9,0.189290,0.189756,0.071897,K00818915,BRD-K00818915-003-01-8,SHP099,protein tyrosine kinase inhibitor,0.117393,-0.000466
...,...,...,...,...,...,...,...,...,...,...
85,BRD-K98251413-001-07-9,0.034381,0.207946,0.218591,K98251413,BRD-K98251413-001-04-0,IOX2,hypoxia inducible factor inhibitor,-0.184210,-0.173566
86,BRD-K99092662-001-02-9,0.181962,0.038368,0.111170,K99092662,BRD-K99092662-001-01-1,PS178990,androgen receptor modulator,0.070791,0.143594
87,BRD-K99113996-001-02-0,1.000000,1.000000,1.000000,K99113996,BRD-K99113996-001-02-0,AZD2014,mTOR inhibitor,0.000000,0.000000
88,BRD-K99433989-001-02-9,0.162272,0.698810,0.354452,K99433989,BRD-K99433989-001-02-9,SAG,smoothened receptor agonist,-0.192180,-0.536537
