In [2]:
import torch
from cellmaps_imagedownloader.runner import CellmapsImageDownloader
from cellmaps_imagedownloader.runner import MultiProcessImageDownloader
from cellmaps_imagedownloader.gene import ImageGeneNodeAttributeGenerator as IGen 
from cellmaps_imagedownloader.proteinatlas import ProteinAtlasReader, ProteinAtlasImageUrlReader, ImageDownloadTupleGenerator
import json
import os
import pandas as pd
from glob import glob
#import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from tqdm import tqdm
from joblib import Parallel, delayed
import json
from collections import Counter
import requests

In [4]:
BASE_PATH = "../CM4AI/Three/raw"
CHANNELS = ["blue", "green", "red", "yellow"]

In [5]:
def collect_image_paths(base_path=BASE_PATH):
    records = []
    for treatment_folder in os.listdir(base_path):
        treatment_path = os.path.join(base_path, treatment_folder)
        if not os.path.isdir(treatment_path):
            continue

        treatment = treatment_folder.split("-")[-1].lower()

        image_dict = {}
        for channel in CHANNELS:
            channel_path = os.path.join(treatment_path, channel)
            for img_path in glob(os.path.join(channel_path, "*.jpg")):
                # Extract base ID (strip _blue, _red, etc.)
                basename = os.path.basename(img_path).replace(f"_{channel}.jpg", "")
                image_dict.setdefault(basename, {"id": basename, "treatment": treatment})
                image_dict[basename][channel] = img_path

        records.extend(image_dict.values())

    return pd.DataFrame(records)

In [6]:
def load_rocrate_metadata_with_antibodies(base_path=BASE_PATH):
    metadata_records = []

    for treatment_folder in os.listdir(base_path):
        crate_path = os.path.join(base_path, treatment_folder, "ro-crate-metadata.json")
        if not os.path.isfile(crate_path):
            continue

        with open(crate_path, "r") as f:
            crate = json.load(f)

        # --- Build antibody/stain index ---
        antibody_index = {}
        for entry in crate.get("@graph", []):
            if entry.get("@type") == "BioChemEntity":
                stain_id = entry["@id"]
                identifiers = entry.get("identifier", [])
                if isinstance(identifiers, dict):
                    identifiers = [identifiers]

                id_map = {i.get("name"): i.get("value") for i in identifiers}

                antibody_index[stain_id] = {
                    "name": entry.get("name"),
                    "description": entry.get("description"),
                    "hpa_id": id_map.get("HPA Antibody ID"),
                    "ensembl": id_map.get("ENSEMBL"),
                    "uniprot": id_map.get("Uniprot"),
                    "pubchem": id_map.get("PubChem"),
                    "subcellular_location": (
                        entry.get("isLocatedInSubcellularLocation", {}).get("name")
                        if isinstance(entry.get("isLocatedInSubcellularLocation"), dict)
                        else None
                    )
                }

        # --- Process each dataset (image) entry ---
        for entry in crate.get("@graph", []):
            if entry.get("@type") != "EVI:Dataset":
                continue

            content_url = entry.get("contentUrl", "")
            filename = os.path.basename(content_url.replace("file://", "")).strip("/")
            if not filename.endswith(".jpg"):
                continue

            base_id = filename.replace(".jpg", "").rsplit("_", 1)[0]
            channel = filename.replace(".jpg", "").rsplit("_", 1)[-1].lower()

            stain_ref = entry.get("usedStain", {}).get("@id", "")
            stain_key = stain_ref.split("/")[-1].replace("stain-", "")
            ab_meta = antibody_index.get(stain_ref, {})

            metadata_records.append({
                "id": base_id,
                "channel": channel,
                "antibody_stain": stain_key,
                "antibody_name": ab_meta.get("name"),
                "antibody_hpa_id": ab_meta.get("hpa_id"),
                "antibody_ensembl": ab_meta.get("ensembl"),
                "antibody_uniprot": ab_meta.get("uniprot"),
                "antibody_pubchem": ab_meta.get("pubchem"),
                "subcellular_location": ab_meta.get("subcellular_location"),
                "cell_line": entry.get("usedCellLine", {}).get("@id", "").split("/")[-1].replace("cell-line-", ""),
                "treatment": entry.get("usedTreatment", {}).get("@id", "").split("/")[-1].replace("treatment-", ""),
                "description": entry.get("description", ""),
                "filename": filename
            })

    return pd.DataFrame(metadata_records)

In [7]:
def batch_lookup_ensembl_symbols(ensembl_ids, batch_size=1000):
    """
    Look up gene symbols from Ensembl using batched POST requests.
    Returns a dict {ensembl_id: gene_symbol}
    """
    url = "https://rest.ensembl.org/lookup/id"
    headers = {"Content-Type": "application/json"}
    id_to_symbol = {}

    for i in range(0, len(ensembl_ids), batch_size):
        batch = ensembl_ids[i:i + batch_size]
        payload = {"ids": batch}
        try:
            response = requests.post(url, headers=headers, json=payload)
            if response.status_code == 200:
                results = response.json()
                for eid, info in results.items():
                    id_to_symbol[eid] = info.get("display_name", None)
            else:
                print(f"⚠️ Error {response.status_code}: {response.text}")
        except Exception as e:
            print(f"⚠️ Request failed for batch starting at {i}: {e}")
    
    return id_to_symbol

In [8]:
def save_image_gene_node_attributes(df_merged, base_output_dir="data/raw"):
    # Filter to green channel (protein target)
    df_green = df_merged[df_merged["channel"] == "green"].copy()

    # Normalize treatment label: "control" becomes "untreated"
    df_green["treatment"] = df_green["treatment"].replace("control", "untreated")

    # Drop exact duplicates across key fields
    df_green = df_green.drop_duplicates(subset=["id", "treatment", "antibody_hpa_id", "antibody_ensembl"])

    # Group by treatment
    treatments = df_green["treatment"].dropna().unique()

    for treatment in treatments:
        df_t = df_green[df_green["treatment"] == treatment]

        df_out = pd.DataFrame({
            "name": df_t["antibody_name"],
            "represents": "ensembl:" + df_t["antibody_ensembl"].fillna(""),
            "ambiguous": df_t["antibody_hpa_id"],
            "antibody": df_t["antibody_hpa_id"],
            "filename": df_t["id"].astype(str) + "_",
            "imageurl": "no image url found"
        })

        unique_ensembl_ids = (
            df_out["represents"]
            .dropna()
            .str.replace("ensembl:", "", regex=False)
            .loc[lambda s: s.str.match(r"ENSG\d+")]  # keep only valid Ensembl Gene IDs
            .unique()
            .tolist()
        )

        ensembl_to_name = batch_lookup_ensembl_symbols(unique_ensembl_ids)

        df_out["name"] = (
            df_out["represents"]
            .str.replace("ensembl:", "", regex=False)
            .map(ensembl_to_name)
        )

        df_out["name"] = df_out["name"].fillna("NEGATIVE")

        # Save to the appropriate treatment folder
        treatment_folder = os.path.join(base_output_dir, treatment)
        os.makedirs(treatment_folder, exist_ok=True)

        out_path = os.path.join(treatment_folder, "1_image_gene_node_attributes.tsv")
        df_out.to_csv(out_path, sep="\t", index=False)

        print(f"✅ Saved: {out_path}")

In [9]:
def load_multichannel_image(row):
    """
    Loads a 4-channel immunofluorescence image from separate grayscale files.

    Args:
        row (pd.Series): A row from df_images with keys: blue, green, red, yellow.

    Returns:
        np.ndarray: H x W x 4 array with channels in the order [blue, green, red, yellow]
    """
    img_channels = []
    for ch in ["blue", "green", "red", "yellow"]:
        path = row[ch]
        img = Image.open(path).convert("L")  # Load as 8-bit grayscale
        img_array = np.array(img)
        img_channels.append(img_array)

    stacked = np.stack(img_channels, axis=-1)  # Shape: H x W x 4
    return stacked

In [11]:
def print_summary_report(df_merged, n_jobs=4):
    print("🧬🔬 CM4AI Immunofluorescence Dataset Summary\n" + "="*45, flush=True)

    # 1. Number of treatments
    n_treatments = df_merged["treatment"].nunique()
    print(f"\n💊 Number of treatments: {n_treatments}", flush=True)
    for cond, count in df_merged["treatment"].value_counts().items():
        print(f"  - {cond}: {count} image-channel combinations", flush=True)

    # 2. Number of samples (unique image IDs) per treatment
    print("\n🧪 Number of unique samples per treatment:", flush=True)
    samples_per_treatment = (
        df_merged[["id", "treatment"]]
        .drop_duplicates()
        .groupby("treatment")
        .size()
    )
    for cond, count in samples_per_treatment.items():
        print(f"  - {cond}: {count} samples", flush=True)

    # 3. Image size distribution (parallelized)
    print("\n🖼 Image size distribution:", flush=True)

    # Reconstruct wide format for loading multichannel images
    df_channels = df_merged[["id", "channel", "filepath"]].drop_duplicates()
    df_shapes = df_channels.pivot(index="id", columns="channel", values="filepath").reset_index()
    df_treatments = df_merged[["id", "treatment"]].drop_duplicates()
    df_shapes = df_shapes.merge(df_treatments, on="id", how="left")

    def safe_load_shape(row):
        try:
            img = load_multichannel_image(row)
            return img.shape[:2]
        except Exception as e:
            print(f"  ⚠️ Error loading image for ID {row['id']}: {e}", flush=True)
            return None

    print("🔄 Computing image shapes in parallel...", flush=True)
    shapes = Parallel(n_jobs=n_jobs, backend="threading")(
        delayed(safe_load_shape)(row) for _, row in tqdm(df_shapes.iterrows(), total=len(df_shapes))
    )
    df_shapes["shape"] = shapes
    shape_counts = Counter([s for s in shapes if s is not None])
    for shape, count in shape_counts.items():
        print(f"  - {shape[0]}x{shape[1]}: {count} composite/multi-channel images", flush=True)

    # 4. Green channel antibody diversity
    green_df = df_merged[df_merged["channel"] == "green"]
    unique_green = sorted(set(green_df["antibody_hpa_id"].dropna().tolist()))
    print(f"\n🟩 Number of unique antibodies in green channel (protein target): {len(unique_green)}", flush=True)

    # 5. Red, Blue, Yellow antibody/stain names with icons
    print("\n🎯 Antibodies/stains used in other channels:", flush=True)

    channel_icons = {
        "red": "🟥",
        "blue": "🟦",
        "yellow": "🟨"
    }

    for ch in ["red", "blue", "yellow"]:
        ch_df = df_merged[df_merged["channel"] == ch]
        unique_ab = sorted(set(
            ch_df["antibody_hpa_id"].dropna().tolist() +
            ch_df["antibody_name"].dropna().tolist()
        ))
        icon = channel_icons.get(ch, "🔹")
        print(f"\n  {icon} {ch.upper()} channel antibodies/stains ({len(unique_ab)}):", flush=True)
        for ab in unique_ab:
            print(f"    - {ab}", flush=True)

    print("\n✅ Summary complete.\n", flush=True)

In [12]:
df_images = collect_image_paths()
df_images

Unnamed: 0,id,treatment,blue,green,red,yellow
0,B2AI_1_Vorinostat_G7_R4_z01,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_1_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_1_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_1_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_1_Vo...
1,B2AI_1_Vorinostat_E9_R7_z01,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_1_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_1_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_1_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_1_Vo...
2,B2AI_3_Vorinostat_B11_R14_z02,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_3_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_3_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_3_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_3_Vo...
3,B2AI_4_Vorinostat_A8_R1_z01,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_4_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_4_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_4_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_4_Vo...
4,B2AI_5_Vorinostat_A10_R8_z02,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_5_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_5_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_5_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_5_Vo...
...,...,...,...,...,...,...
4457,B2AI_4_Vorinostat_B12_R16_z01,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_4_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_4_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_4_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_4_Vo...
4458,B2AI_2_Vorinostat_D12_R14_z00,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_2_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_2_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_2_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_2_Vo...
4459,B2AI_4_Vorinostat_F1_R7_z01,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_4_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_4_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_4_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_4_Vo...
4460,B2AI_1_Vorinostat_A10_R12_z01,vorinostat,../CM4AI/Three/raw/vorinostat/blue/B2AI_1_Vori...,../CM4AI/Three/raw/vorinostat/green/B2AI_1_Vor...,../CM4AI/Three/raw/vorinostat/red/B2AI_1_Vorin...,../CM4AI/Three/raw/vorinostat/yellow/B2AI_1_Vo...


In [13]:
with pd.option_context('display.max_colwidth', None):
    display(df_images[df_images.duplicated(subset="id", keep=False)].head())

Unnamed: 0,id,treatment,blue,green,red,yellow


In [14]:
df_meta = load_rocrate_metadata_with_antibodies()
df_images_melted = df_images.melt(
    id_vars=["id"],  # remove "treatment" here
    value_vars=["blue", "green", "red", "yellow"],
    var_name="channel",
    value_name="filepath"
)

df_merged = df_images_melted.merge(df_meta, on=["id", "channel"], how="left")

In [16]:
save_image_gene_node_attributes(df_merged, base_output_dir=BASE_PATH)

✅ Saved: ../CM4AI/Three/raw/vorinostat/1_image_gene_node_attributes.tsv


In [17]:
import os

from cellmaps_image_embedding.runner import DensenetEmbeddingGenerator
from cellmaps_image_embedding.runner import CellmapsImageEmbedder

input_base_path = "../CM4AI/Three/raw"
image_interim_base_path = "../CM4AI/Three/pipeline_images"
embedding_base_path = "../CM4AI/Three/embedding"

for treatment_folder in os.listdir(input_base_path):
    input_path = os.path.join(input_base_path, treatment_folder)
    if not os.path.isdir(input_path):
        continue
    manifest_path = os.path.join(input_path, "manifest.csv")
    image_interim_path = os.path.join(image_interim_base_path, treatment_folder)
    embedding_path = os.path.join(embedding_base_path, treatment_folder)

    gen = DensenetEmbeddingGenerator(
        input_path,
        outdir=embedding_path,
        model_path="https://github.com/CellProfiling/densenet/releases/download/v0.1.0/external_crop512_focal_slov_hardlog_class_densenet121_dropout_i768_aug2_5folds_fold0_final.pth",
        fold=1
    )
    embedder = CellmapsImageEmbedder(
        outdir=embedding_path,
        inputdir=input_path,
        embedding_generator=gen,
        name=f"{treatment_folder} IF Embedding",
        organization_name="CM4AI",
        project_name="CM4AI IF Embedding Tutorial"
    )
    embedder.run()

The project name for RO-Crate /data/user/ysong2/CM4AI/Three/raw/vorinostat is missing from the metadata. Please provide a name to uphold FAIR principles. Execution will proceed without the  name.
The organization name for RO-Crate /data/user/ysong2/CM4AI/Three/raw/vorinostat is missing from the metadata. Please provide a name to uphold FAIR principles. Execution will proceed without the  name.
Downloading external_crop512_focal_slov_hardlog_class_densenet121_dropout_i768_aug2_5folds_fold0_final.pth: 100%|██████████| 66.1M/66.1M [00:00<00:00, 143MB/s] 


load model: /data/user/ysong2/CM4AI/Three/embedding/vorinostat/model.pth


100%|██████████| 4462/4462 [37:23<00:00,  1.99it/s]  


In [25]:
from cellmaps_ppi_embedding.runner import Node2VecEmbeddingGenerator
from cellmaps_ppi_embedding.runner import CellMapsPPIEmbedder
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


In [26]:
inputdir = '../CM4AI/1.ppi_download'
outdir = '../CM4AI/2.ppi_embedding'
gen = Node2VecEmbeddingGenerator(nx_network=nx.read_edgelist(CellMapsPPIEmbedder.get_apms_edgelist_file(inputdir),
                                                             delimiter='\t'))

x =CellMapsPPIEmbedder(outdir=outdir,
                       embedding_generator=gen,
                      inputdir=inputdir)
x.run()

Computing transition probabilities: 100%|██████████| 1362/1362 [00:06<00:00, 208.97it/s]
Generating walks (CPU: 2): 100%|██████████| 2/2 [00:08<00:00,  4.10s/it]

0

In [3]:
# cell map untreated

from cellmaps_coembedding.runner import MuseCoEmbeddingGenerator
from cellmaps_coembedding.runner import CellmapsCoEmbedder

ppi_embeddingdir = '../CM4AI/2.ppi_embedding'
image_embeddingdir = '../CM4AI/embedding.bak/untreated'
outdir = '../CM4AI/3_new.coembedding_untreated'
gen = MuseCoEmbeddingGenerator(ppi_embeddingdir=ppi_embeddingdir,
                               image_embeddingdir=image_embeddingdir,
                               outdir=os.path.abspath(outdir))

x = CellmapsCoEmbedder(outdir=outdir,
                      inputdirs=[ppi_embeddingdir, image_embeddingdir],
                      embedding_generator=gen)
x.run()

Saving embedding: 0it [00:00, ?it/s]

Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.042444705963134766 seconds
Jaccard graph constructed in 1.0366106033325195 seconds
Wrote graph to binary file in 0.007222175598144531 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.666492
Louvain completed 21 runs in 0.7403297424316406 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 2.8524398803710938 seconds
Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.014791011810302734 seconds
Jaccard graph constructed in 1.0904710292816162 seconds
Wrote graph to binary file in 0.003053903579711914 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.68125
Louvain completed 21 runs in 0.6097586154937744 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 2.698620080947876 seconds
Finding 10 nearest neighbors using cosine metr

Saving embedding: 97it [00:56,  1.72it/s]


0

In [4]:
from cellmaps_generate_hierarchy.ppi import CosineSimilarityPPIGenerator
from cellmaps_generate_hierarchy.hierarchy import CDAPSHiDeFHierarchyGenerator
from cellmaps_generate_hierarchy.maturehierarchy import HiDeFHierarchyRefiner
from cellmaps_generate_hierarchy.hcx import HCXFromCDAPSCXHierarchy
from cellmaps_generate_hierarchy.runner import CellmapsGenerateHierarchy

inputdir = '../CM4AI/3_new.coembedding_untreated'
outdir = '../CM4AI/5.2_new_hierarchy'
ppigen = CosineSimilarityPPIGenerator(embeddingdirs=[inputdir])

refiner = HiDeFHierarchyRefiner()

converter = HCXFromCDAPSCXHierarchy()

hiergen = CDAPSHiDeFHierarchyGenerator(refiner=refiner,
                                       hcxconverter=converter)

x = CellmapsGenerateHierarchy(outdir=outdir,
                              inputdirs=inputdir,
                              ppigen=ppigen,
                              hiergen=hiergen)
x.run()

Generating hierarchy: 15it [00:00, 32.48it/s]

Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX





0

In [5]:
from cellmaps_hierarchyeval.runner import CellmapshierarchyevalRunner

inputdir = '../CM4AI/5.2_new_hierarchy'
outdir = '../CM4AI/6.2_new_hierarchyeval'

x = CellmapshierarchyevalRunner(outdir=outdir,
                               hierarchy_dir=inputdir)
x.run()

0

In [6]:
##### Segmentation

from cellmaps_coembedding.runner import MuseCoEmbeddingGenerator
from cellmaps_coembedding.runner import CellmapsCoEmbedder

ppi_embeddingdir = '../CM4AI/2.ppi_embedding'
image_embeddingdir = '../CM4AI/embedding_Seg/untreated'
outdir = '../CM4AI/3_Seg.coembedding_untreated'
gen = MuseCoEmbeddingGenerator(ppi_embeddingdir=ppi_embeddingdir,
                               image_embeddingdir=image_embeddingdir,
                               outdir=os.path.abspath(outdir))

x = CellmapsCoEmbedder(outdir=outdir,
                      inputdirs=[ppi_embeddingdir, image_embeddingdir],
                      embedding_generator=gen)
x.run()


Saving embedding: 0it [00:00, ?it/s]

Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.022666215896606445 seconds
Jaccard graph constructed in 1.1037273406982422 seconds
Wrote graph to binary file in 0.002635478973388672 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.638028
Louvain completed 21 runs in 0.8151848316192627 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 3.0677661895751953 seconds
Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.015357732772827148 seconds
Jaccard graph constructed in 1.155275583267212 seconds
Wrote graph to binary file in 0.0028772354125976562 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.605502
Louvain completed 21 runs in 0.8059840202331543 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 3.0744400024414062 seconds
Finding 10 nearest neighbors using cosine me

Saving embedding: 87it [00:38,  2.28it/s]


0

In [7]:
from cellmaps_generate_hierarchy.ppi import CosineSimilarityPPIGenerator
from cellmaps_generate_hierarchy.hierarchy import CDAPSHiDeFHierarchyGenerator
from cellmaps_generate_hierarchy.maturehierarchy import HiDeFHierarchyRefiner
from cellmaps_generate_hierarchy.hcx import HCXFromCDAPSCXHierarchy
from cellmaps_generate_hierarchy.runner import CellmapsGenerateHierarchy

inputdir = '../CM4AI/3_Seg.coembedding_untreated'
outdir = '../CM4AI/5.2_Seg_hierarchy'
ppigen = CosineSimilarityPPIGenerator(embeddingdirs=[inputdir])

refiner = HiDeFHierarchyRefiner()

converter = HCXFromCDAPSCXHierarchy()

hiergen = CDAPSHiDeFHierarchyGenerator(refiner=refiner,
                                       hcxconverter=converter)

x = CellmapsGenerateHierarchy(outdir=outdir,
                              inputdirs=inputdir,
                              ppigen=ppigen,
                              hiergen=hiergen)
x.run()

Generating hierarchy: 15it [00:00, 147.30it/s]


Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX




0

In [8]:
from cellmaps_hierarchyeval.runner import CellmapshierarchyevalRunner

inputdir = '../CM4AI/5.2_Seg_hierarchy'
outdir = '../CM4AI/6.2_Seg_hierarchyeval'

x = CellmapshierarchyevalRunner(outdir=outdir,
                               hierarchy_dir=inputdir)
x.run()

0

In [16]:
##### Segmentation vorinostat

from cellmaps_coembedding.runner import MuseCoEmbeddingGenerator
from cellmaps_coembedding.runner import CellmapsCoEmbedder

ppi_embeddingdir = '../CM4AI/2.ppi_embedding'
image_embeddingdir = '../CM4AI/embedding_Seg_vorinostat/untreated'
outdir = '../CM4AI/3_Seg_vorinostat.coembedding_untreated'
gen = MuseCoEmbeddingGenerator(ppi_embeddingdir=ppi_embeddingdir,
                               image_embeddingdir=image_embeddingdir,
                               outdir=os.path.abspath(outdir))

x = CellmapsCoEmbedder(outdir=outdir,
                      inputdirs=[ppi_embeddingdir, image_embeddingdir],
                      embedding_generator=gen)
x.run()


Saving embedding: 0it [00:00, ?it/s]

Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.03680229187011719 seconds
Jaccard graph constructed in 1.1397716999053955 seconds
Wrote graph to binary file in 0.005115032196044922 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.643015
Louvain completed 21 runs in 1.0079474449157715 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 3.428480386734009 seconds
Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.027510643005371094 seconds
Jaccard graph constructed in 1.2344343662261963 seconds
Wrote graph to binary file in 0.003612041473388672 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.665033
Louvain completed 21 runs in 0.9677505493164062 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 3.3857030868530273 seconds
Finding 10 nearest neighbors using cosine metr

Saving embedding: 87it [00:42,  2.06it/s]


0

In [17]:
from cellmaps_generate_hierarchy.ppi import CosineSimilarityPPIGenerator
from cellmaps_generate_hierarchy.hierarchy import CDAPSHiDeFHierarchyGenerator
from cellmaps_generate_hierarchy.maturehierarchy import HiDeFHierarchyRefiner
from cellmaps_generate_hierarchy.hcx import HCXFromCDAPSCXHierarchy
from cellmaps_generate_hierarchy.runner import CellmapsGenerateHierarchy

inputdir = '../CM4AI/3_Seg_vorinostat.coembedding_untreated'
outdir = '../CM4AI/5.2_Seg_vorinostat_hierarchy'
ppigen = CosineSimilarityPPIGenerator(embeddingdirs=[inputdir])

refiner = HiDeFHierarchyRefiner()

converter = HCXFromCDAPSCXHierarchy()

hiergen = CDAPSHiDeFHierarchyGenerator(refiner=refiner,
                                       hcxconverter=converter)

x = CellmapsGenerateHierarchy(outdir=outdir,
                              inputdirs=inputdir,
                              ppigen=ppigen,
                              hiergen=hiergen)
x.run()

Generating hierarchy: 15it [00:00, 108.75it/s]


Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX




0

In [18]:
from cellmaps_hierarchyeval.runner import CellmapshierarchyevalRunner

inputdir = '../CM4AI/5.2_Seg_vorinostat_hierarchy'
outdir = '../CM4AI/6.2_Seg_vorinostat_hierarchyeval'

x = CellmapshierarchyevalRunner(outdir=outdir,
                               hierarchy_dir=inputdir)
x.run()

0

In [19]:
##### Segmentaion vorinostat

from cellmaps_coembedding.runner import MuseCoEmbeddingGenerator
from cellmaps_coembedding.runner import CellmapsCoEmbedder

ppi_embeddingdir = '../CM4AI/2.ppi_embedding'
image_embeddingdir = '../CM4AI/embedding_Seg_paclitaxel/untreated'
outdir = '../CM4AI/3_Seg_paclitaxel.coembedding_untreated'
gen = MuseCoEmbeddingGenerator(ppi_embeddingdir=ppi_embeddingdir,
                               image_embeddingdir=image_embeddingdir,
                               outdir=os.path.abspath(outdir))

x = CellmapsCoEmbedder(outdir=outdir,
                      inputdirs=[ppi_embeddingdir, image_embeddingdir],
                      embedding_generator=gen)
x.run()


Saving embedding: 0it [00:00, ?it/s]

Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.04073500633239746 seconds
Jaccard graph constructed in 1.133939266204834 seconds
Wrote graph to binary file in 0.0036144256591796875 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.623025
After 2 runs, maximum modularity is Q = 0.62597
Louvain completed 22 runs in 1.257913589477539 seconds
Sorting communities by size, please wait ...
PhenoGraph completed in 3.580003499984741 seconds
Finding 10 nearest neighbors using cosine metric and 'brute' algorithm
Neighbors computed in 0.028148412704467773 seconds
Jaccard graph constructed in 1.1031773090362549 seconds
Wrote graph to binary file in 0.003516674041748047 seconds
Running Louvain modularity optimization
After 1 runs, maximum modularity is Q = 0.514514
After 2 runs, maximum modularity is Q = 0.525203
After 3 runs, maximum modularity is Q = 0.529643
After 6 runs, maximum modularity is Q = 0.532437
Louv

Saving embedding: 80it [02:03,  1.54s/it]


0

In [20]:
from cellmaps_generate_hierarchy.ppi import CosineSimilarityPPIGenerator
from cellmaps_generate_hierarchy.hierarchy import CDAPSHiDeFHierarchyGenerator
from cellmaps_generate_hierarchy.maturehierarchy import HiDeFHierarchyRefiner
from cellmaps_generate_hierarchy.hcx import HCXFromCDAPSCXHierarchy
from cellmaps_generate_hierarchy.runner import CellmapsGenerateHierarchy

inputdir = '../CM4AI/3_Seg_paclitaxel.coembedding_untreated'
outdir = '../CM4AI/5.2_Seg_paclitaxel_hierarchy'
ppigen = CosineSimilarityPPIGenerator(embeddingdirs=[inputdir])

refiner = HiDeFHierarchyRefiner()

converter = HCXFromCDAPSCXHierarchy()

hiergen = CDAPSHiDeFHierarchyGenerator(refiner=refiner,
                                       hcxconverter=converter)

x = CellmapsGenerateHierarchy(outdir=outdir,
                              inputdirs=inputdir,
                              ppigen=ppigen,
                              hiergen=hiergen)
x.run()

Generating hierarchy: 15it [00:00, 124.54it/s]


Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX
Generating CX




0

In [21]:
from cellmaps_hierarchyeval.runner import CellmapshierarchyevalRunner

inputdir = '../CM4AI/5.2_Seg_paclitaxel_hierarchy'
outdir = '../CM4AI/6.2_Seg_paclitaxel_hierarchyeval'

x = CellmapshierarchyevalRunner(outdir=outdir,
                               hierarchy_dir=inputdir)
x.run()

0