# Examples of using the PairwiseCompareManager
This notebook shows examples on the NF1 dataset of how to compare instances of different groups.
Each instance is a unique well with Schwann Cells.

In [1]:
import pathlib
import sys

import pandas as pd

# Custom Imports

In [2]:
from comparators.PearsonsCorrelation import PearsonsCorrelation
from comparison_tools.PairwiseCompareManager import PairwiseCompareManager

# Inputs

In [3]:
# Paths to original nuclear speckle data
data_dir = pathlib.Path("../data").resolve(strict=True)

plate4df = pd.read_parquet(data_dir / "Plate_4_bulk_feature_selected.parquet")

# Process Plate 4
Each instance (row) represents a unique well.

In [4]:
plate4df["Metadata_siRNA"] = plate4df["Metadata_siRNA"].fillna("No siRNA")

feat_cols = plate4df.columns[~plate4df.columns.str.contains("Metadata")].tolist()

plate4df.head()

Unnamed: 0,Metadata_WellRow,Metadata_WellCol,Metadata_gene_name,Metadata_genotype,Metadata_seed_density,Metadata_siRNA,Metadata_RNAiMax,Metadata_Concentration,Metadata_Plate,Metadata_Well,...,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_00_256,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_01_256,Nuclei_Texture_InverseDifferenceMoment_DAPI_3_02_256,Nuclei_Texture_InverseDifferenceMoment_GFP_3_02_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_00_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_01_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_02_256,Nuclei_Texture_InverseDifferenceMoment_RFP_3_03_256,Nuclei_Texture_SumEntropy_DAPI_3_03_256,Nuclei_Texture_SumVariance_DAPI_3_01_256
0,B,2,NF1,WT,1000,No siRNA,0,0.0,Plate_4,B2,...,-0.165689,-0.446846,-0.312296,-0.401454,-2.032668,-2.252355,-2.122525,-2.177496,-1.448158,0.031227
1,B,3,NF1,WT,1000,Scramble,1,0.05,Plate_4,B3,...,-0.960711,-1.438404,-0.707251,0.738356,0.1884,-0.844323,-0.846457,-0.34218,1.306905,0.344522
2,B,4,NF1,WT,1000,Scramble,1,0.005,Plate_4,B4,...,1.91017,1.887803,2.496062,-1.858108,0.149212,-1.231724,-1.045144,-0.34036,-1.806753,0.169433
3,B,5,NF1,WT,1000,No siRNA,0,0.0,Plate_4,B5,...,2.573592,2.273321,2.65251,0.884645,0.187687,-0.199618,-0.262197,-0.310826,-1.312691,-0.182742
4,B,6,NF1,WT,1000,Scramble,1,0.005,Plate_4,B6,...,0.929752,1.058821,-0.080929,-0.245173,0.298632,-0.254777,-1.60827,-0.99669,0.15123,2.010409


In [5]:
print(f"\nInput Dataframe shape: {plate4df.shape}\n")


Input Dataframe shape: (60, 1182)



# siRNA Comparisons
In this dataset, each instance (row) corresponds to a well.
When using the PairwiseCompareManager the _same_columns and _different_columns parameters must follow three conditions:
1. _same_columns must include at least one list element if _different_columns has less than two list elements.
2. _different_columns must contain one or more list elements.
3. _same_columns and _different_columns should not contain any of the same columns.

## Compare between wells and siRNA treatments with the same concentration

In [6]:
pearsons_comparator = PearsonsCorrelation()

comparer = PairwiseCompareManager(
    _df=plate4df.copy(),
    _comparator=pearsons_comparator,
    _different_columns=["Metadata_Concentration", "Metadata_Well"],
    _feat_cols=feat_cols,
)

micdf = comparer()

In [7]:
micdf.head()

Unnamed: 0,pearsons_correlation,Metadata_Concentration__antehoc_group0,Metadata_Concentration__antehoc_group1,Metadata_Well__posthoc_group0,Metadata_Well__posthoc_group1
0,-0.025999,0.0,0.001,B11,E10
1,-0.035167,0.0,0.001,B11,E4
2,0.148778,0.0,0.001,B11,E7
3,0.030356,0.0,0.001,B11,F10
4,-0.163734,0.0,0.001,B11,F4


In [8]:
print(f"\nOutput Dataframe shape: {micdf.shape}\n")


Output Dataframe shape: (1485, 5)



In [9]:
print(f"Output Dataframe Columns:\n{micdf.columns.tolist()}")

Output Dataframe Columns:
['pearsons_correlation', 'Metadata_Concentration__antehoc_group0', 'Metadata_Concentration__antehoc_group1', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1']


## Compare between different siRNAs and Wells at the same concentrations

In [10]:
pearsons_comparator = PearsonsCorrelation()

comparer = PairwiseCompareManager(
    _df=plate4df.copy(),
    _comparator=pearsons_comparator,
    _same_columns=["Metadata_Concentration"],
    _different_columns=["Metadata_siRNA", "Metadata_Well"],
    _feat_cols=feat_cols,
)

micdf = comparer()

In [11]:
micdf.head()

Unnamed: 0,pearsons_correlation,Metadata_Concentration__antehoc_group0,Metadata_Concentration__antehoc_group1,Metadata_siRNA__posthoc_group0,Metadata_siRNA__posthoc_group1,Metadata_Well__posthoc_group0,Metadata_Well__posthoc_group1
0,0.128001,"(0.001,)","(0.001,)",NF1 Target 1,NF1 Target 2,F10,G10
1,0.19029,"(0.001,)","(0.001,)",NF1 Target 1,NF1 Target 2,F10,G4
2,0.173737,"(0.001,)","(0.001,)",NF1 Target 1,NF1 Target 2,F10,G7
3,-0.258292,"(0.001,)","(0.001,)",NF1 Target 1,Scramble,F10,E10
4,0.089742,"(0.001,)","(0.001,)",NF1 Target 1,Scramble,F10,E4


In [12]:
print(f"\nOutput Dataframe shape: {micdf.shape}\n")


Output Dataframe shape: (135, 7)



In [13]:
print(f"Output Dataframe Columns:\n{micdf.columns.tolist()}")

Output Dataframe Columns:
['pearsons_correlation', 'Metadata_Concentration__antehoc_group0', 'Metadata_Concentration__antehoc_group1', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1', 'Metadata_Well__posthoc_group0', 'Metadata_Well__posthoc_group1']


## Compare between different stains, conditions, and wells
Supplying the _drop_cols parameter excludes wells from the output dataframe, even though wells are used to compare groups.

In [14]:
pearsons_comparator = PearsonsCorrelation()

comparer = PairwiseCompareManager(
    _df=plate4df.copy(),
    _comparator=pearsons_comparator,
    _same_columns=["Metadata_Concentration"],
    _different_columns=["Metadata_siRNA", "Metadata_Well"],
    _feat_cols=feat_cols,
    _drop_cols=["Metadata_Concentration", "Metadata_Well"],
)

micdf = comparer()

In [15]:
micdf.head()

Unnamed: 0,pearsons_correlation,Metadata_siRNA__posthoc_group0,Metadata_siRNA__posthoc_group1
0,0.128001,"(NF1 Target 1, F10)","(NF1 Target 2, G10)"
1,0.19029,"(NF1 Target 1, F10)","(NF1 Target 2, G4)"
2,0.173737,"(NF1 Target 1, F10)","(NF1 Target 2, G7)"
3,-0.258292,"(NF1 Target 1, F10)","(Scramble, E10)"
4,0.089742,"(NF1 Target 1, F10)","(Scramble, E4)"


In [16]:
print(f"\nOutput Dataframe shape: {micdf.shape}\n")


Output Dataframe shape: (135, 3)



In [17]:
print(f"Output Dataframe Columns:\n{micdf.columns.tolist()}")

Output Dataframe Columns:
['pearsons_correlation', 'Metadata_siRNA__posthoc_group0', 'Metadata_siRNA__posthoc_group1']
