In [3]:
### Modules import

In [2]:
import pandas as pd
from copairs.map import run_pipeline
import numpy as np

import logging
logging.basicConfig(format='%(levelname)s:%(asctime)s:%(name)s:%(message)s')
logging.getLogger("copairs").setLevel(logging.INFO)

### Reading the dataframe 
Batch 1 consists of plate with standard CP dye and other plate stained with Tocris Mitobrilliant dye

In [4]:
batch1_negcon_df = pd.read_csv('gct\\2023_05_15_Batch1\\2023_05_15_Batch1_normalized_feature_select_negcon_batch.csv.gz')

In [5]:
batch2_negcon_df = pd.read_csv('gct\\2023_05_17_Batch2\\2023_05_17_Batch2_normalized_feature_select_negcon_batch.csv.gz')

### Analysis - Plate wise with respect to other treatments

#### Defining parameters to compute map 

In [6]:
pert_col = 'Metadata_broad_sample'

In [7]:
pos_sameby = [pert_col]
pos_diffby = []

neg_sameby = []
neg_diffby = [pert_col]
null_size =10000

### Batch 1
Since Batch1 consists of both the plates - standard CP dyes and the mitobrilliant dye, the dataframe is split

In [8]:
batch1_negcon_df.shape

(767, 795)

##### Check why there is difference in the number of columns and try to understand what the columns are 

In [9]:
standard_negcon_df  = batch1_negcon_df.loc[batch1_negcon_df['Metadata_Plate'] == 'BR00122250']

In [10]:
standard_negcon_df.shape

(384, 795)

In [11]:
mito_negcon_df = batch1_negcon_df.loc[batch1_negcon_df['Metadata_Plate'] == 'BR00122246']

In [12]:
mito_negcon_df.shape

(383, 795)

In [13]:
batch2_negcon_df.shape

(384, 721)

#### how come there is no addition of columns in the negcon normalized data of batch2???

In [15]:
metadata_names_std = [c for c in standard_negcon_df.columns if c.startswith('Metadata')]
feature_names_std = [c for c in standard_negcon_df.columns if not c.startswith('Metadata')]
feats_std = standard_negcon_df[feature_names_std].values
dframe_std = standard_negcon_df[metadata_names_std]

In [16]:
result_std = run_pipeline(dframe_std, feats_std, 
                          pos_sameby, pos_diffby,
                          neg_sameby, neg_diffby,
                          null_size)

INFO:2023-07-14 11:12:22,791:copairs:Indexing metadata...
INFO:2023-07-14 11:12:22,797:copairs:Finding positive pairs...
INFO:2023-07-14 11:12:22,800:copairs:dropping dups...
INFO:2023-07-14 11:12:22,806:copairs:Finding negative pairs...
INFO:2023-07-14 11:12:22,959:copairs:dropping dups...
INFO:2023-07-14 11:12:23,027:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 11:12:23,643:copairs:Computing negative similarities...


  0%|          | 0/4 [00:00<?, ?it/s]

INFO:2023-07-14 11:12:24,791:copairs:Building rank lists...
INFO:2023-07-14 11:12:24,878:copairs:Computing average precision...
INFO:2023-07-14 11:12:24,886:copairs:Computing null distributions...
INFO:2023-07-14 11:12:25,688:copairs:Computing P-values...
INFO:2023-07-14 11:12:25,696:copairs:Creating result DataFrame...
INFO:2023-07-14 11:12:25,698:copairs:Finished.


In [17]:
result_std.to_csv('copairs_csv\\Result_Negcon_Trmts_StandardCP.csv')

In [18]:
from copairs.map import aggregate

In [19]:
agg_result_std = aggregate(result_std, sameby=pos_sameby, threshold=0.05)
agg_result_std.to_csv('copairs_csv\\Aggregate_result_Negcon_Trmts_StandardCP.csv')

###  Mito data 

In [20]:
metadata_names_mito = [ c for c in mito_negcon_df.columns if c.startswith('Metadata')]
feature_names_mito = [c for c in mito_negcon_df.columns if not c.startswith('Metadata')]
feats_mito = mito_negcon_df[feature_names_mito].values
dframe_mito = mito_negcon_df[metadata_names_mito]

In [22]:
result_mito = run_pipeline(dframe_mito, feats_mito,
                           pos_sameby, pos_diffby,
                           neg_sameby, neg_diffby,
                           null_size)

INFO:2023-07-14 11:22:02,904:copairs:Indexing metadata...
INFO:2023-07-14 11:22:02,911:copairs:Finding positive pairs...
INFO:2023-07-14 11:22:02,913:copairs:dropping dups...
INFO:2023-07-14 11:22:02,915:copairs:Finding negative pairs...
INFO:2023-07-14 11:22:03,023:copairs:dropping dups...
INFO:2023-07-14 11:22:03,072:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 11:22:03,684:copairs:Computing negative similarities...


  0%|          | 0/4 [00:00<?, ?it/s]

INFO:2023-07-14 11:22:04,819:copairs:Building rank lists...
INFO:2023-07-14 11:22:04,915:copairs:Computing average precision...
INFO:2023-07-14 11:22:04,923:copairs:Computing null distributions...
INFO:2023-07-14 11:22:05,806:copairs:Computing P-values...
INFO:2023-07-14 11:22:05,812:copairs:Creating result DataFrame...
INFO:2023-07-14 11:22:05,815:copairs:Finished.


In [23]:
result_mito.to_csv('copairs_csv\\Result_Negcon_Trmts_Tocris_mitobrilliant.csv')

In [24]:
agg_result_mito = aggregate(result_mito, sameby=pos_sameby, threshold=0.05)

In [25]:
agg_result_mito.to_csv('copairs_csv\\Aggregate_result_Negcon_Trmts_Tocris_Mitobrilliant.csv')

#### Batch2 - Long stoke shifted actin 

In [26]:
metadata_names_act = [c for c in batch2_negcon_df.columns if c.startswith('Metadata')]
feature_names_act = [ c for c in batch2_negcon_df.columns if not c.startswith('Metadata')]
feats_act = batch2_negcon_df[feature_names_act].values
dframe_act = batch2_negcon_df[metadata_names_act]

In [27]:
result_act = run_pipeline(dframe_act, feats_act,
                          pos_sameby, pos_diffby, 
                          neg_sameby, neg_diffby,
                          null_size)

INFO:2023-07-14 11:30:00,653:copairs:Indexing metadata...
INFO:2023-07-14 11:30:00,662:copairs:Finding positive pairs...
INFO:2023-07-14 11:30:00,664:copairs:dropping dups...
INFO:2023-07-14 11:30:00,670:copairs:Finding negative pairs...
INFO:2023-07-14 11:30:00,820:copairs:dropping dups...
INFO:2023-07-14 11:30:00,892:copairs:Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

INFO:2023-07-14 11:30:01,641:copairs:Computing negative similarities...


  0%|          | 0/4 [00:00<?, ?it/s]

INFO:2023-07-14 11:30:03,001:copairs:Building rank lists...
INFO:2023-07-14 11:30:03,144:copairs:Computing average precision...
INFO:2023-07-14 11:30:03,153:copairs:Computing null distributions...
INFO:2023-07-14 11:30:04,131:copairs:Computing P-values...
INFO:2023-07-14 11:30:04,139:copairs:Creating result DataFrame...
INFO:2023-07-14 11:30:04,140:copairs:Finished.


In [28]:
result_act.to_csv('copairs_csv\\Result_Negcon_Trmts_Phalloidin400LS.csv')

In [29]:
agg_result_act = aggregate(result_act, sameby=pos_sameby, threshold=0.05)
agg_result_act.to_csv('copairs_csv\\Aggregate_result_Negcon_Trmts_Phalloidin400LS.csv')

#### Combining all the aggregated results for easier plotting 

In [30]:
agg_result_std = agg_result_std.rename(columns = {'average_precision': 'average_precision_std'})
agg_result_mito = agg_result_mito.rename(columns = {'average_precision':'average_precision_mito'})
agg_result_act = agg_result_act.rename(columns = {'average_precision':'average_precision_act'})

In [31]:
agg_result_std_subset = agg_result_std[['Metadata_broad_sample', 'average_precision_std']]
agg_result_mito_subset = agg_result_mito[['Metadata_broad_sample', 'average_precision_mito']]
agg_result_act_subset = agg_result_act[['Metadata_broad_sample', 'average_precision_act']]

In [32]:
combined_df = pd.merge(agg_result_std_subset, agg_result_mito_subset, on='Metadata_broad_sample')
combined_df  =pd.merge(combined_df,agg_result_act_subset, on='Metadata_broad_sample')

#### Adding metadata information to the combined_df 


In [34]:
moa_metadata = pd.read_csv('copairs_csv\\LC00009948_MoA_Common_Names.csv')
moa_metadata = moa_metadata.rename(columns = {'BRD with batch':'Metadata_broad_sample'})

##### Extracting BRD ID from BROAD sample name 

In [36]:
def BRD_ID(i):
    if type(i) != float:
        ID = i.split('-')
        return ID[1]

In [37]:
combined_df['BRD ID'] = combined_df['Metadata_broad_sample'].map(BRD_ID)
combined_moa_df = pd.merge(combined_df,moa_metadata, on = 'BRD ID')

### Generating columns for difference in mAP

In [40]:
 
combined_moa_df['std_vs_act'] = combined_moa_df['average_precision_std'] - combined_moa_df['average_precision_act']
combined_moa_df['std_vs_mito'] = combined_moa_df['average_precision_std'] - combined_moa_df['average_precision_mito']


In [41]:
combined_moa_df.to_csv('copairs_csv\\PrecisionValues_with_MoA_allplates_Negcon_Trmts.csv')