## Copairs
* **Details of the analysis in this notebook:**
* **Data from :** CDoT
* **Plates compared:**
    * BR00122248 -
    * BR00122249 -
* **Objective:** To understand the mAP of the plates stained with the new set of dyes.
* **Normalization:** Negcon normalization
* **mAP calculation:** mAP is calculated as difference to controls.

import logging
from pathlib import Path

import numpy as np

In [None]:
import pandas as pd
from copairs.map import run_pipeline

logging.basicConfig(format="%(levelname)s:%(asctime)s:%(name)s:%(message)s")
logging.getLogger("copairs").setLevel(logging.INFO)

In [None]:
### Reading the dataframe

### Load batches

In [None]:

names_batches = {"batch3": "2023_05_17_Batch3", "batch5": "2023_08_02_Batch5"}
batches = {
    name: pd.read_csv(
        Path("gct") / batch / f"{batch}_normalized_feature_select_negcon_batch.csv.gz"
    )
    for name, batch in names_batches.items()
}

### Analysis - Plate wise with respect to control DMSO wells
#### Defining parameters to compute map

In [None]:
pert_col = "Metadata_broad_sample"
control_col = "Metadata_control_type"

In [None]:
pos_sameby = [pert_col]
pos_diffby = []

neg_sameby = []
neg_diffby = [control_col]
null_size = 10000

### Batch 3 and 5


In [None]:
copairs_dir = Path("copairs_csv")
aggregated = {}
for name, batch in batches.items():
    metadata_names = [c for c in batch.columns if c.startswith("Metadata")]
    feature_names = [c for c in batch.columns if not c.startswith("Metadata")]
    feats = batch[feature_names].values
    dframe = batch[metadata_names]
    dframe[control_col].fillna("trt", inplace=True)
    result = run_pipeline(
        dframe, feats, pos_sameby, pos_diffby, neg_sameby, neg_diffby, null_size
    )
    result.to_csv(copairs_dir / f"Result_Negcon_wrt_Controls_{name}.csv")
    from copairs.map import aggregate

    aggregated[name] = aggregate(result, sameby=pos_sameby, threshold=0.05)
    aggregated[name].to_csv(
        copairs_dir / f"Aggregate_result_Negcon_wrt_Controls_{name}.csv"
    )

#### Merge all results

In [None]:

combined_df = pd.merge(
    *aggregated.values(),
    on="Metadata_broad_sample",
    suffixes=[f"_{batch}" for batch in aggregated.keys()],
)

In [None]:
moa_metadata = pd.read_csv(copairs_dir / "LC00009948_MoA_Common_Names.csv")
moa_metadata = moa_metadata.rename(columns={"BRD with batch": "Metadata_broad_sample"})

##### Extracting BRD ID from BROAD sample name

In [None]:
def BRD_ID(i):
    if type(i) != float:
        ID = i.split("-")
        return ID[1]

In [None]:
combined_df["BRD ID"] = combined_df["Metadata_broad_sample"].map(BRD_ID)
combined_moa_df = pd.merge(combined_df, moa_metadata, on="BRD ID")

### Generating columns for difference in mAP

In [None]:

combined_moa_df["batch3_vs_batch5"] = (
    combined_moa_df["mean_average_precision_batch3"]
    - combined_moa_df["mean_average_precision_batch5"]
)

In [None]:
combined_moa_df.to_csv(
    copairs_dir / "PrecisionValues_with_MoA_allplates_Negcon_wrt_Controls.csv"
)