## Run copairs on all evalzoo results and check if AP values match

In [3]:
import sys

sys.path.append("../scripts")

import logging
from pathlib import Path

import pandas as pd
import numpy as np

from load import load_config
from copairs.map import run_pipeline, aggregate

import seaborn as sns
from matplotlib import pyplot as plt

from tqdm.auto import tqdm

import pickle

#### Run copairs on evalzoo configs with existing results

In [4]:
def run_copairs_on_evalzoo(evalzoo_result, null_size=10000, batch_size=100000):
    config = load_config(f"../2.evalzoo/results/{evalzoo_result}/params.yaml")
    profiles_path = config["experiment"]["input_structure"].format(
        **config["experiment"]
    )

    # check if exists
    if Path(profiles_path).exists():
        ann_df = pd.read_parquet(profiles_path)

        ann_df.reset_index(inplace=True, drop=True)
        metadata = ann_df.filter(regex="^Metadata_")
        feature_values = ann_df.filter(regex="^(?!Metadata_)").values
        assert "Metadata_pert_type" in metadata.columns

        # add dummy index to speed up calculations by skipping positive pairs between controls
        metadata.loc[:, "Metadata_dummy_index"] = metadata.index
        metadata.loc[
            metadata["Metadata_pert_type"] == "trt", "Metadata_dummy_index"
        ] = -1

        pos_sameby = config["experiment"]["sim_params"]["all_same_cols_rep"] or []
        pos_sameby = [c for c in pos_sameby if c != "Metadata_reference_or_other"]
        pos_diffby = config["experiment"]["sim_params"]["all_different_cols_rep"] or []
        pos_diffby = [pos_diffby] if isinstance(pos_diffby, str) else pos_diffby

        neg_sameby = []
        neg_diffby = ["Metadata_pert_type"]

        print("Sameby:", pos_sameby, "Diffby:", pos_diffby)

        copairs_result, _, _ = run_pipeline(
            metadata,
            feature_values,
            pos_sameby + ["Metadata_dummy_index"],
            pos_diffby,
            neg_sameby,
            neg_diffby,
            null_size=null_size,
            batch_size=batch_size,
        )

        return aggregate(copairs_result, sameby=pos_sameby, threshold=0.05)
    else:
        logging.warning(f"File {profiles_path} not found. Skipping.")
        return None

In [5]:
copairs_results = {}

In [4]:
results_dir = Path("../2.evalzoo/results/")

for subdir in tqdm(results_dir.iterdir()):
    collatedsim_path = subdir / "collatedsim.parquet"
    l_0_1_path = subdir / "metrics_level_1_0_ref.parquet"

    if collatedsim_path.exists() and l_0_1_path.exists():
        subdir_collatedsim = pd.read_parquet(collatedsim_path)
        subdir_l_1_0 = pd.read_parquet(l_0_1_path)

        # create groupby objects for both DataFrames
        group_col = subdir_l_1_0.columns[1]
        collatedsim_groups = subdir_collatedsim.groupby(group_col)["id1"].unique()
        l_1_0_groups = subdir_l_1_0.groupby(group_col)["id1"].unique()

        # find groups (values of Metadata_JCP2022) that exist in both DataFrames
        common_groups = set(collatedsim_groups.index).intersection(
            set(l_1_0_groups.index)
        )

        # compare the unique id1 values for each common group
        mismatch = any(
            set(collatedsim_groups[jcp]) != set(l_1_0_groups[jcp])
            for jcp in common_groups
        )

        # check if there was a mismatch
        if mismatch:
            print(f"{subdir.name} id1 mismatch")
        else:
            print(f"{subdir.name} id1 match OK")

        copairs_results[subdir.name] = run_copairs_on_evalzoo(subdir.name)

        # pickle intermediate results
        with open("output/copairs_results.pkl", "wb") as f:
            pickle.dump(copairs_results, f)

0it [00:00, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:34:14 INFO     Indexing metadata...
2023-07-03 15:34:14 INFO     Finding positive pairs...


630757cf id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 15:34:14 INFO     dropping dups...
2023-07-03 15:34:14 INFO     Finding negative pairs...
2023-07-03 15:34:34 INFO     dropping dups...
2023-07-03 15:35:18 INFO     Computing positive similarities...
2023-07-03 15:35:18.568863: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:35:18 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:35:28 INFO     Building rank lists...
2023-07-03 15:35:49 INFO     Computing average precision...
2023-07-03 15:35:49 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:35:59 INFO     Computing P-values...
2023-07-03 15:35:59 INFO     Creating result DataFrame...
2023-07-03 15:35:59 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:36:03 INFO     Indexing metadata...
2023-07-03 15:36:03 INFO     Finding positive pairs...


66fd2a4d id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:36:03 INFO     dropping dups...
2023-07-03 15:36:03 INFO     Finding negative pairs...
2023-07-03 15:36:24 INFO     dropping dups...
2023-07-03 15:37:11 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:37:11 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:37:20 INFO     Building rank lists...
2023-07-03 15:37:41 INFO     Computing average precision...
2023-07-03 15:37:41 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:37:55 INFO     Computing P-values...
2023-07-03 15:37:55 INFO     Creating result DataFrame...
2023-07-03 15:37:55 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:37:58 INFO     Indexing metadata...
2023-07-03 15:37:59 INFO     Finding positive pairs...


f86c9fcc id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:37:59 INFO     dropping dups...
2023-07-03 15:37:59 INFO     Finding negative pairs...
2023-07-03 15:38:19 INFO     dropping dups...
2023-07-03 15:39:02 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:39:02 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:39:12 INFO     Building rank lists...
2023-07-03 15:39:32 INFO     Computing average precision...
2023-07-03 15:39:32 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:39:45 INFO     Computing P-values...
2023-07-03 15:39:45 INFO     Creating result DataFrame...
2023-07-03 15:39:45 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:39:49 INFO     Indexing metadata...
2023-07-03 15:39:49 INFO     Finding positive pairs...


ac65bc4b id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:39:49 INFO     dropping dups...
2023-07-03 15:39:49 INFO     Finding negative pairs...
2023-07-03 15:40:09 INFO     dropping dups...
2023-07-03 15:40:52 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:40:52 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:41:08 INFO     Building rank lists...
2023-07-03 15:41:29 INFO     Computing average precision...
2023-07-03 15:41:29 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:41:43 INFO     Computing P-values...
2023-07-03 15:41:43 INFO     Creating result DataFrame...
2023-07-03 15:41:43 INFO     Finished.


d7ae7409 id1 match OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:41:49 INFO     Indexing metadata...
2023-07-03 15:41:49 INFO     Finding positive pairs...


273d3138 id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 15:41:49 INFO     dropping dups...
2023-07-03 15:41:49 INFO     Finding negative pairs...
2023-07-03 15:42:10 INFO     dropping dups...
2023-07-03 15:42:56 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:42:56 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:43:06 INFO     Building rank lists...
2023-07-03 15:43:27 INFO     Computing average precision...
2023-07-03 15:43:27 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:43:40 INFO     Computing P-values...
2023-07-03 15:43:40 INFO     Creating result DataFrame...
2023-07-03 15:43:40 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:43:44 INFO     Indexing metadata...
2023-07-03 15:43:44 INFO     Finding positive pairs...


263a5ff4 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:43:44 INFO     dropping dups...
2023-07-03 15:43:44 INFO     Finding negative pairs...
2023-07-03 15:44:05 INFO     dropping dups...
2023-07-03 15:44:50 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:44:50 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:45:47 INFO     Building rank lists...
2023-07-03 15:46:06 INFO     Computing average precision...
2023-07-03 15:46:07 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:46:17 INFO     Computing P-values...
2023-07-03 15:46:18 INFO     Creating result DataFrame...
2023-07-03 15:46:18 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:46:21 INFO     Indexing metadata...
2023-07-03 15:46:21 INFO     Finding positive pairs...


2bde8689 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:46:22 INFO     dropping dups...
2023-07-03 15:46:22 INFO     Finding negative pairs...
2023-07-03 15:46:42 INFO     dropping dups...
2023-07-03 15:47:26 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:47:26 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:48:04 INFO     Building rank lists...
2023-07-03 15:48:24 INFO     Computing average precision...
2023-07-03 15:48:25 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:48:32 INFO     Computing P-values...
2023-07-03 15:48:32 INFO     Creating result DataFrame...
2023-07-03 15:48:32 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:48:36 INFO     Indexing metadata...
2023-07-03 15:48:36 INFO     Finding positive pairs...


f5c36cb4 id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 15:48:36 INFO     dropping dups...
2023-07-03 15:48:36 INFO     Finding negative pairs...
2023-07-03 15:48:57 INFO     dropping dups...
2023-07-03 15:49:42 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:49:42 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:51:28 INFO     Building rank lists...
2023-07-03 15:51:49 INFO     Computing average precision...
2023-07-03 15:51:49 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:51:56 INFO     Computing P-values...
2023-07-03 15:51:56 INFO     Creating result DataFrame...
2023-07-03 15:51:56 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:52:01 INFO     Indexing metadata...
2023-07-03 15:52:01 INFO     Finding positive pairs...


30f99c08 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:52:02 INFO     dropping dups...
2023-07-03 15:52:02 INFO     Finding negative pairs...
2023-07-03 15:52:23 INFO     dropping dups...
2023-07-03 15:53:06 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:53:06 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:53:16 INFO     Building rank lists...
2023-07-03 15:53:36 INFO     Computing average precision...
2023-07-03 15:53:36 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:53:43 INFO     Computing P-values...
2023-07-03 15:53:43 INFO     Creating result DataFrame...
2023-07-03 15:53:43 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:53:47 INFO     Indexing metadata...
2023-07-03 15:53:47 INFO     Finding positive pairs...


125bfb5b id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 15:53:47 INFO     dropping dups...
2023-07-03 15:53:47 INFO     Finding negative pairs...
2023-07-03 15:54:08 INFO     dropping dups...
2023-07-03 15:54:53 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:54:53 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:55:03 INFO     Building rank lists...
2023-07-03 15:55:24 INFO     Computing average precision...
2023-07-03 15:55:24 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:55:30 INFO     Computing P-values...
2023-07-03 15:55:31 INFO     Creating result DataFrame...
2023-07-03 15:55:31 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:55:34 INFO     Indexing metadata...
2023-07-03 15:55:34 INFO     Finding positive pairs...


8a46f718 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 15:55:34 INFO     dropping dups...
2023-07-03 15:55:34 INFO     Finding negative pairs...
2023-07-03 15:55:55 INFO     dropping dups...
2023-07-03 15:56:40 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:56:40 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:56:49 INFO     Building rank lists...
2023-07-03 15:57:09 INFO     Computing average precision...
2023-07-03 15:57:09 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:57:16 INFO     Computing P-values...
2023-07-03 15:57:16 INFO     Creating result DataFrame...
2023-07-03 15:57:16 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:57:20 INFO     Indexing metadata...
2023-07-03 15:57:20 INFO     Finding positive pairs...


3d645cb2 id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 15:57:20 INFO     dropping dups...
2023-07-03 15:57:20 INFO     Finding negative pairs...
2023-07-03 15:57:41 INFO     dropping dups...
2023-07-03 15:58:24 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 15:58:24 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 15:58:33 INFO     Building rank lists...
2023-07-03 15:58:53 INFO     Computing average precision...
2023-07-03 15:58:54 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 15:59:00 INFO     Computing P-values...
2023-07-03 15:59:01 INFO     Creating result DataFrame...
2023-07-03 15:59:01 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 15:59:04 INFO     Indexing metadata...
2023-07-03 15:59:04 INFO     Finding positive pairs...


d2311b58 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_JCP2022']


2023-07-03 15:59:06 INFO     dropping dups...
2023-07-03 15:59:07 INFO     Finding negative pairs...
2023-07-03 15:59:28 INFO     dropping dups...
2023-07-03 16:00:14 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 16:00:15 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:01:36 INFO     Building rank lists...
2023-07-03 16:01:57 INFO     Computing average precision...
2023-07-03 16:01:57 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:02:04 INFO     Computing P-values...
2023-07-03 16:02:05 INFO     Creating result DataFrame...
2023-07-03 16:02:05 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:02:08 INFO     Indexing metadata...
2023-07-03 16:02:08 INFO     Finding positive pairs...


d4df76cd id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 16:02:09 INFO     dropping dups...
2023-07-03 16:02:09 INFO     Finding negative pairs...
2023-07-03 16:02:30 INFO     dropping dups...
2023-07-03 16:03:15 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:03:16 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:03:53 INFO     Building rank lists...
2023-07-03 16:04:13 INFO     Computing average precision...
2023-07-03 16:04:14 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:04:21 INFO     Computing P-values...
2023-07-03 16:04:21 INFO     Creating result DataFrame...
2023-07-03 16:04:21 INFO     Finished.


a534201d id1 match OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:04:27 INFO     Indexing metadata...
2023-07-03 16:04:27 INFO     Finding positive pairs...


26efacc8 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:04:27 INFO     dropping dups...
2023-07-03 16:04:27 INFO     Finding negative pairs...
2023-07-03 16:04:48 INFO     dropping dups...
2023-07-03 16:05:31 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:05:31 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:06:28 INFO     Building rank lists...
2023-07-03 16:06:49 INFO     Computing average precision...
2023-07-03 16:06:49 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:06:58 INFO     Computing P-values...
2023-07-03 16:06:58 INFO     Creating result DataFrame...
2023-07-03 16:06:58 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:07:02 INFO     Indexing metadata...
2023-07-03 16:07:02 INFO     Finding positive pairs...


5ee8aabb id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 16:07:04 INFO     dropping dups...
2023-07-03 16:07:05 INFO     Finding negative pairs...
2023-07-03 16:07:26 INFO     dropping dups...
2023-07-03 16:08:12 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 16:08:13 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:09:18 INFO     Building rank lists...
2023-07-03 16:09:39 INFO     Computing average precision...
2023-07-03 16:09:39 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:09:47 INFO     Computing P-values...
2023-07-03 16:09:47 INFO     Creating result DataFrame...
2023-07-03 16:09:47 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:09:51 INFO     Indexing metadata...
2023-07-03 16:09:51 INFO     Finding positive pairs...


64aa0540 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:09:51 INFO     dropping dups...
2023-07-03 16:09:51 INFO     Finding negative pairs...
2023-07-03 16:10:12 INFO     dropping dups...
2023-07-03 16:10:58 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:10:58 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:11:08 INFO     Building rank lists...
2023-07-03 16:11:27 INFO     Computing average precision...
2023-07-03 16:11:28 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:11:38 INFO     Computing P-values...
2023-07-03 16:11:39 INFO     Creating result DataFrame...
2023-07-03 16:11:39 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:11:42 INFO     Indexing metadata...
2023-07-03 16:11:42 INFO     Finding positive pairs...


faa6ba3e id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_JCP2022'] Diffby: []


2023-07-03 16:11:43 INFO     dropping dups...
2023-07-03 16:11:43 INFO     Finding negative pairs...
2023-07-03 16:12:03 INFO     dropping dups...
2023-07-03 16:12:46 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:12:46 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:14:38 INFO     Building rank lists...
2023-07-03 16:14:58 INFO     Computing average precision...
2023-07-03 16:14:58 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:15:06 INFO     Computing P-values...
2023-07-03 16:15:06 INFO     Creating result DataFrame...
2023-07-03 16:15:06 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:15:12 INFO     Indexing metadata...
2023-07-03 16:15:12 INFO     Finding positive pairs...


49abf604 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:15:13 INFO     dropping dups...
2023-07-03 16:15:13 INFO     Finding negative pairs...
2023-07-03 16:15:33 INFO     dropping dups...
2023-07-03 16:16:19 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:16:19 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:16:30 INFO     Building rank lists...
2023-07-03 16:16:50 INFO     Computing average precision...
2023-07-03 16:16:50 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:16:57 INFO     Computing P-values...
2023-07-03 16:16:57 INFO     Creating result DataFrame...
2023-07-03 16:16:57 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:17:01 INFO     Indexing metadata...
2023-07-03 16:17:01 INFO     Finding positive pairs...


790b8557 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:17:01 INFO     dropping dups...
2023-07-03 16:17:01 INFO     Finding negative pairs...
2023-07-03 16:17:22 INFO     dropping dups...
2023-07-03 16:18:06 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:18:06 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:19:21 INFO     Building rank lists...
2023-07-03 16:19:41 INFO     Computing average precision...
2023-07-03 16:19:41 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:19:48 INFO     Computing P-values...
2023-07-03 16:19:48 INFO     Creating result DataFrame...
2023-07-03 16:19:48 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:19:50 INFO     Indexing metadata...
2023-07-03 16:19:50 INFO     Finding positive pairs...
2023-07-03 16:19:50 INFO     dropping dups...
2023-07-03 16:19:50 INFO     F

50eec5d3 id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:19:50 INFO     Computing negative similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:19:50 INFO     Building rank lists...
2023-07-03 16:19:50 INFO     Computing average precision...
2023-07-03 16:19:50 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:19:50 INFO     Computing P-values...
2023-07-03 16:19:50 INFO     Creating result DataFrame...
2023-07-03 16:19:50 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:19:53 INFO     Indexing metadata...
2023-07-03 16:19:53 INFO     Finding positive pairs...


fefae721 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:19:53 INFO     dropping dups...
2023-07-03 16:19:53 INFO     Finding negative pairs...
2023-07-03 16:20:14 INFO     dropping dups...
2023-07-03 16:21:02 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:21:02 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:21:14 INFO     Building rank lists...
2023-07-03 16:21:34 INFO     Computing average precision...
2023-07-03 16:21:35 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:21:42 INFO     Computing P-values...
2023-07-03 16:21:42 INFO     Creating result DataFrame...
2023-07-03 16:21:42 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:21:46 INFO     Indexing metadata...
2023-07-03 16:21:46 INFO     Finding positive pairs...


cdaf16db id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:21:46 INFO     dropping dups...
2023-07-03 16:21:46 INFO     Finding negative pairs...
2023-07-03 16:22:07 INFO     dropping dups...
2023-07-03 16:22:51 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:22:51 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:23:01 INFO     Building rank lists...
2023-07-03 16:23:21 INFO     Computing average precision...
2023-07-03 16:23:21 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:23:29 INFO     Computing P-values...
2023-07-03 16:23:29 INFO     Creating result DataFrame...
2023-07-03 16:23:29 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:23:33 INFO     Indexing metadata...
2023-07-03 16:23:33 INFO     Finding positive pairs...


ef524325 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:23:33 INFO     dropping dups...
2023-07-03 16:23:33 INFO     Finding negative pairs...
2023-07-03 16:23:54 INFO     dropping dups...
2023-07-03 16:24:42 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:24:43 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:26:16 INFO     Building rank lists...
2023-07-03 16:26:37 INFO     Computing average precision...
2023-07-03 16:26:37 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:26:45 INFO     Computing P-values...
2023-07-03 16:26:45 INFO     Creating result DataFrame...
2023-07-03 16:26:45 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:26:51 INFO     Indexing metadata...
2023-07-03 16:26:51 INFO     Finding positive pairs...


b2e52a3a id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:26:51 INFO     dropping dups...
2023-07-03 16:26:51 INFO     Finding negative pairs...
2023-07-03 16:27:12 INFO     dropping dups...
2023-07-03 16:27:56 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:27:56 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:28:50 INFO     Building rank lists...
2023-07-03 16:29:11 INFO     Computing average precision...
2023-07-03 16:29:11 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:29:21 INFO     Computing P-values...
2023-07-03 16:29:21 INFO     Creating result DataFrame...
2023-07-03 16:29:21 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:29:26 INFO     Indexing metadata...
2023-07-03 16:29:26 INFO     Finding positive pairs...


82ad004e id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_JCP2022']


2023-07-03 16:29:28 INFO     dropping dups...
2023-07-03 16:29:29 INFO     Finding negative pairs...
2023-07-03 16:29:49 INFO     dropping dups...
2023-07-03 16:30:30 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 16:30:32 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:32:21 INFO     Building rank lists...
2023-07-03 16:32:43 INFO     Computing average precision...
2023-07-03 16:32:43 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:32:50 INFO     Computing P-values...
2023-07-03 16:32:51 INFO     Creating result DataFrame...
2023-07-03 16:32:51 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:32:56 INFO     Indexing metadata...
2023-07-03 16:32:56 INFO     Finding positive pairs...


df66eaf7 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:32:56 INFO     dropping dups...
2023-07-03 16:32:56 INFO     Finding negative pairs...
2023-07-03 16:33:17 INFO     dropping dups...
2023-07-03 16:34:02 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:34:02 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:34:12 INFO     Building rank lists...
2023-07-03 16:34:33 INFO     Computing average precision...
2023-07-03 16:34:33 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:34:40 INFO     Computing P-values...
2023-07-03 16:34:40 INFO     Creating result DataFrame...
2023-07-03 16:34:40 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:34:44 INFO     Indexing metadata...
2023-07-03 16:34:44 INFO     Finding positive pairs...


b3ded838 id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 16:34:44 INFO     dropping dups...
2023-07-03 16:34:44 INFO     Finding negative pairs...
2023-07-03 16:35:05 INFO     dropping dups...
2023-07-03 16:35:46 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:35:46 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:37:06 INFO     Building rank lists...
2023-07-03 16:37:27 INFO     Computing average precision...
2023-07-03 16:37:27 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:37:33 INFO     Computing P-values...
2023-07-03 16:37:33 INFO     Creating result DataFrame...
2023-07-03 16:37:33 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:37:38 INFO     Indexing metadata...
2023-07-03 16:37:38 INFO     Finding positive pairs...


703b13da id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:37:38 INFO     dropping dups...
2023-07-03 16:37:38 INFO     Finding negative pairs...
2023-07-03 16:37:59 INFO     dropping dups...
2023-07-03 16:38:43 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:38:43 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:38:53 INFO     Building rank lists...
2023-07-03 16:39:15 INFO     Computing average precision...
2023-07-03 16:39:15 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:39:22 INFO     Computing P-values...
2023-07-03 16:39:22 INFO     Creating result DataFrame...
2023-07-03 16:39:22 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:39:26 INFO     Indexing metadata...
2023-07-03 16:39:26 INFO     Finding positive pairs...


5a048fe4 id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 16:39:27 INFO     dropping dups...
2023-07-03 16:39:27 INFO     Finding negative pairs...
2023-07-03 16:39:47 INFO     dropping dups...
2023-07-03 16:40:32 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:40:33 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:41:32 INFO     Building rank lists...
2023-07-03 16:41:53 INFO     Computing average precision...
2023-07-03 16:41:54 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:42:01 INFO     Computing P-values...
2023-07-03 16:42:01 INFO     Creating result DataFrame...
2023-07-03 16:42:01 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:42:05 INFO     Indexing metadata...
2023-07-03 16:42:05 INFO     Finding positive pairs...


0e4cd68f id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_JCP2022']


2023-07-03 16:42:07 INFO     dropping dups...
2023-07-03 16:42:08 INFO     Finding negative pairs...
2023-07-03 16:42:29 INFO     dropping dups...
2023-07-03 16:43:13 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 16:43:14 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:44:16 INFO     Building rank lists...
2023-07-03 16:44:37 INFO     Computing average precision...
2023-07-03 16:44:37 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:44:47 INFO     Computing P-values...
2023-07-03 16:44:47 INFO     Creating result DataFrame...
2023-07-03 16:44:47 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:44:50 INFO     Indexing metadata...
2023-07-03 16:44:50 INFO     Finding positive pairs...


bcf32878 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 16:44:51 INFO     dropping dups...
2023-07-03 16:44:51 INFO     Finding negative pairs...
2023-07-03 16:45:12 INFO     dropping dups...
2023-07-03 16:45:58 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:45:58 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:46:08 INFO     Building rank lists...
2023-07-03 16:46:29 INFO     Computing average precision...
2023-07-03 16:46:29 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:46:36 INFO     Computing P-values...
2023-07-03 16:46:36 INFO     Creating result DataFrame...
2023-07-03 16:46:36 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:46:40 INFO     Indexing metadata...
2023-07-03 16:46:40 INFO     Finding positive pairs...


61da8ae0 id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 16:46:40 INFO     dropping dups...
2023-07-03 16:46:41 INFO     Finding negative pairs...
2023-07-03 16:47:01 INFO     dropping dups...
2023-07-03 16:47:44 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 16:47:44 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 16:49:09 INFO     Building rank lists...
2023-07-03 16:49:30 INFO     Computing average precision...
2023-07-03 16:49:30 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 16:49:37 INFO     Computing P-values...
2023-07-03 16:49:37 INFO     Creating result DataFrame...
2023-07-03 16:49:37 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 16:49:43 INFO     Indexing metadata...
2023-07-03 16:49:43 INFO     Finding positive pairs...


b8ad0e58 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 16:49:45 INFO     dropping dups...
2023-07-03 16:49:46 INFO     Finding negative pairs...
2023-07-03 17:03:23 INFO     dropping dups...
2023-07-03 17:11:42 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:11:44 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:13:23 INFO     Building rank lists...
2023-07-03 17:13:44 INFO     Computing average precision...
2023-07-03 17:13:44 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:13:51 INFO     Computing P-values...
2023-07-03 17:13:51 INFO     Creating result DataFrame...
2023-07-03 17:13:51 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:13:57 INFO     Indexing metadata...
2023-07-03 17:13:57 INFO     Finding positive pairs...


21b92180 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 17:13:58 INFO     dropping dups...
2023-07-03 17:13:58 INFO     Finding negative pairs...
2023-07-03 17:14:19 INFO     dropping dups...
2023-07-03 17:15:03 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:15:03 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:15:13 INFO     Building rank lists...
2023-07-03 17:15:33 INFO     Computing average precision...
2023-07-03 17:15:33 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:15:41 INFO     Computing P-values...
2023-07-03 17:15:41 INFO     Creating result DataFrame...
2023-07-03 17:15:41 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:15:44 INFO     Indexing metadata...
2023-07-03 17:15:44 INFO     Finding positive pairs...


d66f1bce id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_JCP2022'] Diffby: []


2023-07-03 17:15:45 INFO     dropping dups...
2023-07-03 17:15:45 INFO     Finding negative pairs...
2023-07-03 17:16:05 INFO     dropping dups...
2023-07-03 17:16:51 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:16:51 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:18:23 INFO     Building rank lists...
2023-07-03 17:18:43 INFO     Computing average precision...
2023-07-03 17:18:44 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:18:50 INFO     Computing P-values...
2023-07-03 17:18:50 INFO     Creating result DataFrame...
2023-07-03 17:18:50 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:18:55 INFO     Indexing metadata...
2023-07-03 17:18:55 INFO     Finding positive pairs...


0485b96b id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 17:18:56 INFO     dropping dups...
2023-07-03 17:18:56 INFO     Finding negative pairs...
2023-07-03 17:19:16 INFO     dropping dups...
2023-07-03 17:20:02 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:20:02 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:20:50 INFO     Building rank lists...
2023-07-03 17:21:11 INFO     Computing average precision...
2023-07-03 17:21:11 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:21:18 INFO     Computing P-values...
2023-07-03 17:21:18 INFO     Creating result DataFrame...
2023-07-03 17:21:18 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:21:22 INFO     Indexing metadata...
2023-07-03 17:21:22 INFO     Finding positive pairs...


ceb5f02a id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 17:21:22 INFO     dropping dups...
2023-07-03 17:21:22 INFO     Finding negative pairs...
2023-07-03 17:21:43 INFO     dropping dups...
2023-07-03 17:22:28 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:22:28 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:23:19 INFO     Building rank lists...
2023-07-03 17:23:38 INFO     Computing average precision...
2023-07-03 17:23:39 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:23:45 INFO     Computing P-values...
2023-07-03 17:23:45 INFO     Creating result DataFrame...
2023-07-03 17:23:45 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:23:49 INFO     Indexing metadata...
2023-07-03 17:23:49 INFO     Finding positive pairs...


cd1b92de id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 17:23:49 INFO     dropping dups...
2023-07-03 17:23:49 INFO     Finding negative pairs...
2023-07-03 17:24:10 INFO     dropping dups...
2023-07-03 17:24:54 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:24:54 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:25:03 INFO     Building rank lists...
2023-07-03 17:25:24 INFO     Computing average precision...
2023-07-03 17:25:24 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:25:35 INFO     Computing P-values...
2023-07-03 17:25:35 INFO     Creating result DataFrame...
2023-07-03 17:25:35 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:25:39 INFO     Indexing metadata...
2023-07-03 17:25:39 INFO     Finding positive pairs...


fd50b01b id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 17:25:39 INFO     dropping dups...
2023-07-03 17:25:39 INFO     Finding negative pairs...
2023-07-03 17:25:59 INFO     dropping dups...
2023-07-03 17:26:45 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:26:45 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:26:54 INFO     Building rank lists...
2023-07-03 17:27:15 INFO     Computing average precision...
2023-07-03 17:27:15 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:27:22 INFO     Computing P-values...
2023-07-03 17:27:22 INFO     Creating result DataFrame...
2023-07-03 17:27:22 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:27:25 INFO     Indexing metadata...
2023-07-03 17:27:25 INFO     Finding positive pairs...


63526422 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 17:27:27 INFO     dropping dups...
2023-07-03 17:27:28 INFO     Finding negative pairs...
2023-07-03 17:27:48 INFO     dropping dups...
2023-07-03 17:28:33 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:28:34 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:29:05 INFO     Building rank lists...
2023-07-03 17:29:25 INFO     Computing average precision...
2023-07-03 17:29:25 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:29:32 INFO     Computing P-values...
2023-07-03 17:29:33 INFO     Creating result DataFrame...
2023-07-03 17:29:33 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:29:36 INFO     Indexing metadata...
2023-07-03 17:29:36 INFO     Finding positive pairs...


5eaae264 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 17:29:38 INFO     dropping dups...
2023-07-03 17:29:39 INFO     Finding negative pairs...
2023-07-03 17:29:59 INFO     dropping dups...
2023-07-03 17:30:44 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:30:45 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:31:29 INFO     Building rank lists...
2023-07-03 17:31:49 INFO     Computing average precision...
2023-07-03 17:31:49 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:31:57 INFO     Computing P-values...
2023-07-03 17:31:57 INFO     Creating result DataFrame...
2023-07-03 17:31:57 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:32:01 INFO     Indexing metadata...
2023-07-03 17:32:01 INFO     Finding positive pairs...


e2bf955d id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_JCP2022'] Diffby: []


2023-07-03 17:32:01 INFO     dropping dups...
2023-07-03 17:32:01 INFO     Finding negative pairs...
2023-07-03 17:32:22 INFO     dropping dups...
2023-07-03 17:33:06 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:33:06 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:34:01 INFO     Building rank lists...
2023-07-03 17:34:20 INFO     Computing average precision...
2023-07-03 17:34:20 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:34:27 INFO     Computing P-values...
2023-07-03 17:34:27 INFO     Creating result DataFrame...
2023-07-03 17:34:27 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:34:31 INFO     Indexing metadata...
2023-07-03 17:34:31 INFO     Finding positive pairs...


758dee96 id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 17:34:31 INFO     dropping dups...
2023-07-03 17:34:31 INFO     Finding negative pairs...
2023-07-03 17:34:52 INFO     dropping dups...
2023-07-03 17:35:36 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:35:37 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:36:28 INFO     Building rank lists...
2023-07-03 17:36:48 INFO     Computing average precision...
2023-07-03 17:36:48 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:36:55 INFO     Computing P-values...
2023-07-03 17:36:55 INFO     Creating result DataFrame...
2023-07-03 17:36:55 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:36:58 INFO     Indexing metadata...
2023-07-03 17:36:58 INFO     Finding positive pairs...


889ca0ab id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 17:37:00 INFO     dropping dups...
2023-07-03 17:37:01 INFO     Finding negative pairs...
2023-07-03 17:37:22 INFO     dropping dups...
2023-07-03 17:38:06 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:38:08 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:39:09 INFO     Building rank lists...
2023-07-03 17:39:30 INFO     Computing average precision...
2023-07-03 17:39:30 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:39:37 INFO     Computing P-values...
2023-07-03 17:39:37 INFO     Creating result DataFrame...
2023-07-03 17:39:37 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:39:41 INFO     Indexing metadata...
2023-07-03 17:39:41 INFO     Finding positive pairs...


0cc6a4ec id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 17:39:41 INFO     dropping dups...
2023-07-03 17:39:41 INFO     Finding negative pairs...
2023-07-03 17:40:01 INFO     dropping dups...
2023-07-03 17:40:46 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:40:46 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:41:18 INFO     Building rank lists...
2023-07-03 17:41:38 INFO     Computing average precision...
2023-07-03 17:41:39 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:41:46 INFO     Computing P-values...
2023-07-03 17:41:46 INFO     Creating result DataFrame...
2023-07-03 17:41:46 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:41:49 INFO     Indexing metadata...
2023-07-03 17:41:50 INFO     Finding positive pairs...


adacbee3 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 17:41:51 INFO     dropping dups...
2023-07-03 17:41:52 INFO     Finding negative pairs...
2023-07-03 17:42:13 INFO     dropping dups...
2023-07-03 17:42:58 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:42:59 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:43:34 INFO     Building rank lists...
2023-07-03 17:43:56 INFO     Computing average precision...
2023-07-03 17:43:56 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:44:04 INFO     Computing P-values...
2023-07-03 17:44:04 INFO     Creating result DataFrame...
2023-07-03 17:44:04 INFO     Finished.


32dde2e8 id1 match OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:44:11 INFO     Indexing metadata...
2023-07-03 17:44:11 INFO     Finding positive pairs...


3701ed2c id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_JCP2022']


2023-07-03 17:44:13 INFO     dropping dups...
2023-07-03 17:44:14 INFO     Finding negative pairs...
2023-07-03 17:44:34 INFO     dropping dups...
2023-07-03 17:45:20 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:45:20 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:45:49 INFO     Building rank lists...
2023-07-03 17:46:09 INFO     Computing average precision...
2023-07-03 17:46:10 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:46:17 INFO     Computing P-values...
2023-07-03 17:46:17 INFO     Creating result DataFrame...
2023-07-03 17:46:17 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:46:20 INFO     Indexing metadata...
2023-07-03 17:46:20 INFO     Finding positive pairs...


df015b9f id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 17:46:21 INFO     dropping dups...
2023-07-03 17:46:21 INFO     Finding negative pairs...
2023-07-03 17:46:41 INFO     dropping dups...
2023-07-03 17:47:27 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:47:27 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:48:30 INFO     Building rank lists...
2023-07-03 17:48:51 INFO     Computing average precision...
2023-07-03 17:48:51 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:48:58 INFO     Computing P-values...
2023-07-03 17:48:58 INFO     Creating result DataFrame...
2023-07-03 17:48:58 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:49:02 INFO     Indexing metadata...
2023-07-03 17:49:02 INFO     Finding positive pairs...


edaff3b6 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 17:49:04 INFO     dropping dups...
2023-07-03 17:49:05 INFO     Finding negative pairs...
2023-07-03 17:49:26 INFO     dropping dups...
2023-07-03 17:50:12 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 17:50:13 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:51:01 INFO     Building rank lists...
2023-07-03 17:55:39 INFO     Computing average precision...
2023-07-03 17:55:40 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:55:49 INFO     Computing P-values...
2023-07-03 17:55:49 INFO     Creating result DataFrame...
2023-07-03 17:55:49 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:55:53 INFO     Indexing metadata...
2023-07-03 17:55:53 INFO     Finding positive pairs...


6fae5285 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 17:55:53 INFO     dropping dups...
2023-07-03 17:55:53 INFO     Finding negative pairs...
2023-07-03 17:56:13 INFO     dropping dups...
2023-07-03 17:56:59 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 17:56:59 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 17:58:35 INFO     Building rank lists...
2023-07-03 17:58:56 INFO     Computing average precision...
2023-07-03 17:58:56 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 17:59:03 INFO     Computing P-values...
2023-07-03 17:59:04 INFO     Creating result DataFrame...
2023-07-03 17:59:04 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 17:59:09 INFO     Indexing metadata...
2023-07-03 17:59:09 INFO     Finding positive pairs...


304eb132 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_JCP2022']


2023-07-03 17:59:11 INFO     dropping dups...
2023-07-03 17:59:12 INFO     Finding negative pairs...
2023-07-03 17:59:33 INFO     dropping dups...
2023-07-03 18:00:21 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 18:00:23 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:01:59 INFO     Building rank lists...
2023-07-03 18:02:21 INFO     Computing average precision...
2023-07-03 18:02:21 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:02:28 INFO     Computing P-values...
2023-07-03 18:02:28 INFO     Creating result DataFrame...
2023-07-03 18:02:28 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:02:33 INFO     Indexing metadata...
2023-07-03 18:02:33 INFO     Finding positive pairs...


c4c7aa12 id1 mismatch
Sameby: ['Metadata_Well'] Diffby: ['Metadata_Symbol']


2023-07-03 18:02:35 INFO     dropping dups...
2023-07-03 18:02:36 INFO     Finding negative pairs...
2023-07-03 18:02:56 INFO     dropping dups...
2023-07-03 18:03:42 INFO     Computing positive similarities...


  0%|          | 0/5 [00:00<?, ?it/s]

2023-07-03 18:03:44 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:05:37 INFO     Building rank lists...
2023-07-03 18:05:58 INFO     Computing average precision...
2023-07-03 18:05:58 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:06:06 INFO     Computing P-values...
2023-07-03 18:06:06 INFO     Creating result DataFrame...
2023-07-03 18:06:06 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:06:12 INFO     Indexing metadata...
2023-07-03 18:06:12 INFO     Finding positive pairs...


34fbd8a4 id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_JCP2022'] Diffby: []


2023-07-03 18:06:12 INFO     dropping dups...
2023-07-03 18:06:13 INFO     Finding negative pairs...
2023-07-03 18:06:33 INFO     dropping dups...
2023-07-03 18:07:19 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:07:19 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:08:27 INFO     Building rank lists...
2023-07-03 18:08:47 INFO     Computing average precision...
2023-07-03 18:08:48 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:08:58 INFO     Computing P-values...
2023-07-03 18:08:58 INFO     Creating result DataFrame...
2023-07-03 18:08:58 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:09:02 INFO     Indexing metadata...
2023-07-03 18:09:02 INFO     Finding positive pairs...


10792e71 id1 match OK
Sameby: ['Metadata_Symbol'] Diffby: ['Metadata_Well']


2023-07-03 18:09:02 INFO     dropping dups...
2023-07-03 18:09:02 INFO     Finding negative pairs...
2023-07-03 18:09:23 INFO     dropping dups...
2023-07-03 18:10:10 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:10:10 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:10:19 INFO     Building rank lists...
2023-07-03 18:10:39 INFO     Computing average precision...
2023-07-03 18:10:40 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:10:46 INFO     Computing P-values...
2023-07-03 18:10:46 INFO     Creating result DataFrame...
2023-07-03 18:10:46 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:10:50 INFO     Indexing metadata...
2023-07-03 18:10:50 INFO     Finding positive pairs...


1b22d880 id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 18:10:50 INFO     dropping dups...
2023-07-03 18:10:50 INFO     Finding negative pairs...
2023-07-03 18:11:11 INFO     dropping dups...
2023-07-03 18:11:57 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:11:58 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:13:30 INFO     Building rank lists...
2023-07-03 18:13:52 INFO     Computing average precision...
2023-07-03 18:13:52 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:14:00 INFO     Computing P-values...
2023-07-03 18:14:00 INFO     Creating result DataFrame...
2023-07-03 18:14:00 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:14:05 INFO     Indexing metadata...
2023-07-03 18:14:05 INFO     Finding positive pairs...


306333dc id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 18:14:06 INFO     dropping dups...
2023-07-03 18:14:06 INFO     Finding negative pairs...
2023-07-03 18:14:27 INFO     dropping dups...
2023-07-03 18:15:11 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:15:12 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:16:01 INFO     Building rank lists...
2023-07-03 18:16:24 INFO     Computing average precision...
2023-07-03 18:16:25 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:16:33 INFO     Computing P-values...
2023-07-03 18:16:33 INFO     Creating result DataFrame...
2023-07-03 18:16:33 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:16:36 INFO     Indexing metadata...
2023-07-03 18:16:36 INFO     Finding positive pairs...


440d5bda id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 18:16:37 INFO     dropping dups...
2023-07-03 18:16:37 INFO     Finding negative pairs...
2023-07-03 18:16:58 INFO     dropping dups...
2023-07-03 18:17:45 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:17:45 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:18:15 INFO     Building rank lists...
2023-07-03 18:18:36 INFO     Computing average precision...
2023-07-03 18:18:36 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:18:45 INFO     Computing P-values...
2023-07-03 18:18:45 INFO     Creating result DataFrame...
2023-07-03 18:18:45 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:18:48 INFO     Indexing metadata...
2023-07-03 18:18:48 INFO     Finding positive pairs...


e8e9de1e id1 match OK
Sameby: ['Metadata_JCP2022'] Diffby: ['Metadata_Well']


2023-07-03 18:18:49 INFO     dropping dups...
2023-07-03 18:18:49 INFO     Finding negative pairs...
2023-07-03 18:19:09 INFO     dropping dups...
2023-07-03 18:19:56 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:19:56 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:20:06 INFO     Building rank lists...
2023-07-03 18:20:26 INFO     Computing average precision...
2023-07-03 18:20:26 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:20:38 INFO     Computing P-values...
2023-07-03 18:20:38 INFO     Creating result DataFrame...
2023-07-03 18:20:38 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:20:41 INFO     Indexing metadata...
2023-07-03 18:20:41 INFO     Finding positive pairs...


a69994b0 id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_JCP2022'] Diffby: []


2023-07-03 18:20:42 INFO     dropping dups...
2023-07-03 18:20:42 INFO     Finding negative pairs...
2023-07-03 18:21:03 INFO     dropping dups...
2023-07-03 18:21:49 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:21:49 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:22:20 INFO     Building rank lists...
2023-07-03 18:22:41 INFO     Computing average precision...
2023-07-03 18:22:41 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:22:48 INFO     Computing P-values...
2023-07-03 18:22:48 INFO     Creating result DataFrame...
2023-07-03 18:22:48 INFO     Finished.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  metadata.loc[:, "Metadata_dummy_index"] = metadata.index
2023-07-03 18:22:52 INFO     Indexing metadata...
2023-07-03 18:22:52 INFO     Finding positive pairs...


90b5b91d id1 mismatch
Sameby: ['Metadata_Well', 'Metadata_Symbol'] Diffby: []


2023-07-03 18:22:53 INFO     dropping dups...
2023-07-03 18:22:53 INFO     Finding negative pairs...
2023-07-03 18:23:13 INFO     dropping dups...
2023-07-03 18:23:59 INFO     Computing positive similarities...


  0%|          | 0/1 [00:00<?, ?it/s]

2023-07-03 18:24:00 INFO     Computing negative similarities...


  0%|          | 0/161 [00:00<?, ?it/s]

2023-07-03 18:24:34 INFO     Building rank lists...
2023-07-03 18:24:55 INFO     Computing average precision...
2023-07-03 18:24:55 INFO     Computing null distributions...
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
  ap = (pr_k * rel_k).sum(axis=1) / num_pos
2023-07-03 18:25:02 INFO     Computing P-values...
2023-07-03 18:25:03 INFO     Creating result DataFrame...
2023-07-03 18:25:03 INFO     Finished.


#### Compare copairs results with evalzoo

In [42]:
with open("output/copairs_results.pkl", "rb") as f:
    copairs_results = pickle.load(f)

In [43]:
copairs_results_nona = {k: v for k, v in copairs_results.items() if v is not None}
len(copairs_results), len(copairs_results_nona)

(64, 60)

In [44]:
mismatches = []
for key in copairs_results_nona.keys():
    copairs_result = copairs_results_nona[key][
        ~copairs_results_nona[key].average_precision.isna()
    ]
    evalzoo_result = pd.read_parquet(
        f"../2.evalzoo/results/{key}/metrics_level_1_ref.parquet"
    )
    evalzoo_result = evalzoo_result.groupby(
        copairs_result.filter(regex="^Metadata_").columns.tolist()
    ).mean(numeric_only=True)

    if np.allclose(
        copairs_result.average_precision,
        evalzoo_result.sim_retrieval_average_precision_ref_i_mean_i,
    ):
        print(f"{key} match OK")
    else:
        print(f"{key} mismatch")
        mismatches.append(key)

630757cf match OK
66fd2a4d match OK
f86c9fcc match OK
ac65bc4b match OK
273d3138 match OK
263a5ff4 match OK
2bde8689 match OK
f5c36cb4 match OK
30f99c08 match OK
125bfb5b match OK
8a46f718 match OK
3d645cb2 match OK
d2311b58 match OK
d4df76cd match OK
26efacc8 match OK
5ee8aabb match OK
64aa0540 match OK
faa6ba3e match OK
49abf604 match OK
790b8557 match OK
fefae721 match OK
cdaf16db match OK
ef524325 match OK
b2e52a3a match OK
82ad004e match OK
df66eaf7 match OK
b3ded838 match OK
703b13da match OK
5a048fe4 match OK
0e4cd68f match OK
bcf32878 match OK
61da8ae0 match OK
b8ad0e58 match OK
21b92180 match OK
d66f1bce match OK
0485b96b match OK
ceb5f02a match OK
cd1b92de match OK
fd50b01b match OK
63526422 match OK
5eaae264 match OK
e2bf955d match OK
758dee96 match OK
889ca0ab match OK
0cc6a4ec match OK
adacbee3 match OK
3701ed2c match OK
df015b9f match OK
edaff3b6 match OK
6fae5285 match OK
304eb132 match OK
c4c7aa12 match OK
34fbd8a4 match OK
10792e71 match OK
1b22d880 match OK
306333dc m

In [45]:
mismatches

[]