## Create mesh creation settings

In [2]:
import pandas as pd
import os
import yaml


def is_dir_empty_except_hidden(path):
    return all(f.startswith(".") for f in os.listdir(path))


df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/Segmentation Challenge _ List of Collections and Crops  - Sheet1.csv"
)
collections = df.collection.unique()
for current_collection in collections:
    print(current_collection)
    collection_df = df[df["collection"] == current_collection]

    zarr_path = f"/nrs/cellmap/data/{current_collection}/staging/groundtruth.zarr"  # f"/nrs/cellmap/bennettd/data/crop_tests/{current_collection}.zarr"
    for _, row in collection_df.iterrows():
        crop = f"crop{row.crop_number}"
        for dataset_name in os.listdir(f"{zarr_path}/{crop}"):
            if not os.path.isdir(f"{zarr_path}/{crop}/{dataset_name}"):
                continue

            if is_dir_empty_except_hidden(f"{zarr_path}/{crop}/{dataset_name}/s0"):
                continue

            crop_dir = f"/groups/cellmap/cellmap/ackermand/new_meshes/scripts/single_resolution/crop_check/{current_collection}/{crop}/{dataset_name}"
            os.makedirs(name=crop_dir, exist_ok=True)
            os.system(f"cp ./default_meshes/dask-config.yaml {crop_dir}")

            run_config_yaml = {
                # "input_path": f"/nrs/cellmap/bennettd/data/crop_tests/{current_collection}.zarr/{crop}/{dataset_name}/s0",
                "input_path": f"{zarr_path}/{crop}/{dataset_name}/s0",
                "output_directory": f"/nrs/cellmap/ackermand/new_meshes/meshes/crop_check/{current_collection}/{crop}/{dataset_name}",
                "read_write_roi": ["0:256", "0:256", "0:256"],
                "downsample_factor": 0,
                "target_reduction": 0.5,
                "n_smoothing_iter": 0,
                "remove_smallest_components": False,
                "do_analysis": False,
                "do_legacy_neuroglancer": True,
            }

            with open(f"{crop_dir}/run-config.yaml", "w") as f:
                yaml.dump(run_config_yaml, f)
# for dataset in *; do cd $dataset; for crop in *; do cd $crop; for organelle in *; do bsub -n 4 -P cellmap -o /dev/null -e /dev/null meshify -n 4 $organelle; done; cd ..; done; sleep 15m; cd ..; done;

jrc_cos7-1a
jrc_cos7-1b
jrc_ctl-id8-1
jrc_fly-mb-1a
jrc_fly-vnc-1
jrc_hela-2
jrc_hela-3
jrc_jurkat-1
jrc_macrophage-2
jrc_mus-heart-1
jrc_mus-kidney
jrc_mus-kidney-3
jrc_mus-kidney-glomerulus-2
jrc_mus-liver
jrc_mus-liver-3
jrc_mus-liver-zon-1
jrc_mus-liver-zon-2
jrc_mus-nacc-1
jrc_mus-pancreas-1
jrc_sum159-1
jrc_sum159-4
jrc_ut21-1413-003
jrc_zf-cardiac-1


In [22]:
import pandas as pd
import neuroglancer
from funlib.persistence import open_ds
import os


def add_local_volumes(collection, crop, state):
    zarr_path = f"/nrs/cellmap/bennettd/data/crop_tests/{collection}.zarr"
    print(f"{zarr_path}/{crop}")
    for dataset_name in os.listdir(f"{zarr_path}/{crop}"):
        if not os.path.isdir(f"{zarr_path}/{crop}/{dataset_name}"):
            continue
        ds = open_ds(zarr_path, crop + f"/{dataset_name}/s0")
        data = ds.to_ndarray()
        local_volume = neuroglancer.LocalVolume(
            data=data,
            dimensions=neuroglancer.CoordinateSpace(
                names=["z", "y", "x"],
                units=["nm", "nm", "nm"],
                scales=ds.voxel_size,
                coordinate_arrays=[
                    None,
                    None,
                    None,
                ],
            ),
            voxel_offset=ds.roi.begin,
        )

        state.layers.append(name=f"{crop}_{dataset_name}", layer=local_volume)


df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/Segmentation Challenge _ List of Collections and Crops  - Sheet1.csv"
)

collections = df.collection.unique()
neuroglancer.set_server_bind_address("0.0.0.0")
viewer = neuroglancer.Viewer()

current_collection = collections[0]
with viewer.txn() as s:
    s.layers["raw"] = neuroglancer.ImageLayer(
        source="precomputed://gs://neuroglancer-public-data/flyem_fib-25/image",
    )
    for _, row in df.iterrows():
        if row.collection == current_collection:
            crop = f"crop{row.crop_number}"
            add_local_volumes(row.collection, crop, s)
print(viewer)

/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop234
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop236
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop237
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop239
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop243
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop247
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop248
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop252
/nrs/cellmap/bennettd/data/crop_tests/jrc_cos7-1a.zarr/crop253


: 

## Write out neuroglancer jsons

In [12]:
import pandas as pd
import neuroglancer
import os
from funlib.persistence import open_ds
import json


def is_dir_empty_except_hidden(path):
    return all(f.startswith(".") for f in os.listdir(path))


neuroglancer.set_server_bind_address("0.0.0.0")


df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/Segmentation Challenge _ List of Collections and Crops  - Sheet1.csv"
)
collections = df.collection.unique()
all_info = []
for current_collection in collections:
    print(current_collection)
    output_dir = f"/nrs/cellmap/ackermand/crop_check/viewer_states/{current_collection}"
    os.makedirs(output_dir, exist_ok=True)

    collection_df = df[df["collection"] == current_collection]

    zarr_path = f"/nrs/cellmap/data/{current_collection}/{current_collection}.zarr"  # f"/nrs/cellmap/bennettd/data/crop_tests/{current_collection}.zarr"
    for _, row in collection_df.iterrows():
        viewer = neuroglancer.Viewer()
        have_set_position = False
        with viewer.txn() as s:
            s.layers["raw"] = neuroglancer.ImageLayer(
                source=f"zarr://https://cellmap-vm1.int.janelia.org/nrs/data/{current_collection}/{current_collection}.zarr/recon-1/em/fibsem-uint8",
            )
            crop = f"crop{row.crop_number}"
            crop_path = f"{zarr_path}/recon-1/labels/groundtruth/{crop}"
            for dataset_name in os.listdir(crop_path):
                if not os.path.isdir(f"{crop_path}/{dataset_name}"):
                    continue

                if is_dir_empty_except_hidden(f"{crop_path}/{dataset_name}/s0"):
                    continue

                try:
                    segments = [
                        int(mesh_id.split(":0")[0])
                        for mesh_id in os.listdir(
                            f"/nrs/cellmap/ackermand/new_meshes/meshes/crop_check/{current_collection}/{crop}/{dataset_name}/meshes"
                        )
                        if ":0" in mesh_id
                    ]
                except:
                    print(f"Error in {current_collection}/{crop}/{dataset_name}")
                    continue
                image_source = f'zarr://https://cellmap-vm1.int.janelia.org/{crop_path.replace("/nrs/cellmap/","/nrs/")}/{dataset_name}'
                mesh_source = f"precomputed://https://cellmap-vm1.int.janelia.org/nrs/ackermand/new_meshes/meshes/crop_check/{current_collection}/{crop}/{dataset_name}/meshes"
                s.layers[f"{crop}_{dataset_name}"] = neuroglancer.SegmentationLayer(
                    source=[image_source, mesh_source], segments=segments
                )

                if dataset_name != "all":
                    s.layers[f"{crop}_{dataset_name}"].visible = False

                if not have_set_position:
                    ds = open_ds(
                        zarr_path,
                        f"recon-1/labels/groundtruth/{crop}/{dataset_name}/s0",
                    )
                    position = (ds.roi.begin + ds.roi.end) / 2
                    s.position = position[::-1]
                    s.dimensions = neuroglancer.CoordinateSpace(
                        names=["x", "y", "z"],
                        units=["nm", "nm", "nm"],
                        scales=[1, 1, 1],
                        coordinate_arrays=[
                            None,
                            None,
                            None,
                        ],
                    )

                    have_set_position = True

        # Assuming viewer.state.to_json() returns a dictionary
        viewer_state = viewer.state.to_json()
        # Write the JSON data to a file
        with open(f"{output_dir}/{crop}.json", "w") as json_file:
            json.dump(viewer_state, json_file, indent=4)
        neuroglancer_url = f"https://neuroglancer-demo.appspot.com#!https://cellmap-vm1.int.janelia.org/nrs/ackermand/crop_check/viewer_states/{current_collection}/{crop}.json"
        all_info.append((crop, current_collection, neuroglancer_url))

jrc_cos7-1a
jrc_cos7-1b
jrc_ctl-id8-1
jrc_fly-mb-1a
jrc_fly-vnc-1
jrc_hela-2
jrc_hela-3
jrc_jurkat-1
jrc_macrophage-2
jrc_mus-heart-1
jrc_mus-kidney
jrc_mus-kidney-3
jrc_mus-kidney-glomerulus-2
jrc_mus-liver
jrc_mus-liver-3
jrc_mus-liver-zon-1
jrc_mus-liver-zon-2
jrc_mus-nacc-1
jrc_mus-pancreas-1
jrc_sum159-1
jrc_sum159-4
jrc_ut21-1413-003
jrc_zf-cardiac-1


In [13]:
import pandas as pd

df = pd.DataFrame(all_info, columns=["crop_number", "collection", "url"])

df.to_csv("SegmentationChallengeWithNeuroglancerURLs.csv", index=False)

In [42]:
import json

# Assuming viewer.state.to_json() returns a dictionary
viewer_state = viewer.state.to_json()

# Write the JSON data to a file
with open("/nrs/cellmap/ackermand/test/viewer_state.json", "w") as json_file:
    json.dump(viewer_state, json_file, indent=4)

## Check for empty pixels

In [8]:
import pandas as pd
import os
import numpy as np
from funlib.persistence import open_ds
from image_data_interface import ImageDataInterface
import logging
from tqdm import tqdm

logging.getLogger().setLevel(logging.WARNING)


def is_dir_empty_except_hidden(path):
    return all(f.startswith(".") for f in os.listdir(path))


crops_with_uncertainty = [163, 157, 319, 320, 115, 191, 196, 214, 224]
df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/Segmentation Challenge _ List of Collections and Crops  - Sheet1.csv"
)
collections = df.collection.unique()
for current_collection in collections:
    collection_df = df[df["collection"] == current_collection]

    zarr_path = f"/nrs/cellmap/data/{current_collection}/{current_collection}.zarr"  # f"/nrs/cellmap/bennettd/data/crop_tests/{current_collection}.zarr"
    for _, row in collection_df.iterrows():
        if row.crop_number not in crops_with_uncertainty:
            crop = f"crop{row.crop_number}"
            crop_path = f"{zarr_path}/recon-1/labels/groundtruth/{crop}"
            for dataset_name in os.listdir(crop_path):
                if not os.path.isdir(f"{crop_path}/{dataset_name}"):
                    continue

                if is_dir_empty_except_hidden(f"{crop_path}/{dataset_name}/s0"):
                    continue

                if dataset_name == "all":
                    data = ImageDataInterface(
                        f"{zarr_path}/recon-1/labels/groundtruth/{crop}/all/s0"
                    ).to_ndarray_ds()
                    # find if data array has any zeros
                    unique_ids, unique_counts = np.unique(data, return_counts=True)
                    if unique_ids[0] == 0:
                        print(
                            f"{current_collection}/{crop}/all has {len(unique_ids)} unique ids and {unique_counts[0]} empty voxels"
                        )

jrc_cos7-1a/crop247/all has 2 unique ids and 806051394 empty voxels
jrc_cos7-1a/crop253/all has 2 unique ids and 807477679 empty voxels
jrc_cos7-1a/crop257/all has 2 unique ids and 137159008 empty voxels
jrc_cos7-1b/crop238/all has 3 unique ids and 268326414 empty voxels
jrc_hela-3/crop100/all has 5 unique ids and 264382206 empty voxels
jrc_hela-3/crop102/all has 3 unique ids and 215277561 empty voxels
jrc_mus-heart-1/crop423/all has 2 unique ids and 55385895 empty voxels
jrc_mus-heart-1/crop452/all has 2 unique ids and 54430449 empty voxels
jrc_mus-kidney/crop179/all has 2 unique ids and 105321086 empty voxels
jrc_mus-kidney/crop184/all has 2 unique ids and 18319096 empty voxels
jrc_mus-kidney/crop221/all has 2 unique ids and 22478108 empty voxels
jrc_mus-kidney/crop229/all has 2 unique ids and 812730390 empty voxels
jrc_mus-kidney/crop230/all has 3 unique ids and 38846544 empty voxels
jrc_mus-kidney/crop231/all has 3 unique ids and 37667960 empty voxels
jrc_mus-kidney-3/crop472/all h

## Check for small isolated pixels

In [22]:
from image_data_interface import ImageDataInterface
import pandas as pd
import os
import numpy as np
import logging

logging.getLogger().setLevel(logging.WARNING)


def is_dir_empty_except_hidden(path):
    return all(f.startswith(".") for f in os.listdir(path))


df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/Segmentation Challenge _ List of Collections and Crops  - Sheet1.csv"
)
collections = df.collection.unique()
for current_collection in collections:
    collection_df = df[df["collection"] == current_collection]

    zarr_path = f"/nrs/cellmap/data/{current_collection}/{current_collection}.zarr"  # f"/nrs/cellmap/bennettd/data/crop_tests/{current_collection}.zarr"
    for _, row in collection_df.iterrows():
        crop = f"crop{row.crop_number}"
        crop_path = f"{zarr_path}/recon-1/labels/groundtruth/{crop}"
        for dataset_name in os.listdir(crop_path):
            if not os.path.isdir(f"{crop_path}/{dataset_name}"):
                continue

            if is_dir_empty_except_hidden(f"{crop_path}/{dataset_name}/s0"):
                continue

            if dataset_name != "all":
                data = ImageDataInterface(
                    f"{zarr_path}/recon-1/labels/groundtruth/{crop}/{dataset_name}/s0"
                ).to_ndarray_ts()
                ids, counts = np.unique(data[data > 0], return_counts=True)
                if np.min(counts) <= 5:
                    print(
                        f"{current_collection}/{crop}/{dataset_name} has small objects: {ids[counts<=5]}"
                    )

jrc_cos7-1a/crop234/er_lum has small objects: [6 7 8]
jrc_cos7-1a/crop237/endo has small objects: [7 8]
jrc_cos7-1a/crop237/endo_mem has small objects: [7 8]
jrc_cos7-1a/crop237/er_lum has small objects: [ 2  3  5  6  7  8  9 10]
jrc_cos7-1a/crop239/cyto has small objects: [ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28]
jrc_cos7-1a/crop239/er_lum has small objects: [ 3 11]
jrc_cos7-1a/crop239/er_mem has small objects: [7]
jrc_cos7-1a/crop239/er_mem_all has small objects: [7]
jrc_cos7-1a/crop247/vim has small objects: [34 36 37 38 39 42 43 45 46 48 55 56 62 79 80 96]
jrc_cos7-1a/crop248/cyto has small objects: [2]
jrc_cos7-1a/crop248/endo_lum has small objects: [ 8  9 12 13 17 18 20 22 23 24 25 26 27 28]
jrc_cos7-1a/crop248/er_lum has small objects: [2]
jrc_cos7-1a/crop248/mt_in has small objects: [4]
jrc_cos7-1a/crop252/cyto has small objects: [3]
jrc_cos7-1a/crop253/mt has small objects: [33 34]
jrc_cos7-1a/crop253/mt_out has small objects: [33 34]


### Write out with links

In [59]:
import pandas as pd

df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/SegmentationChallengeWithNeuroglancerURLs.csv"
)

# copy the above to isolated_pixels.txt
with open("isolated_pixels.txt", "r") as file:
    file_contents = file.read()

output = []
# split at newlines
file_contents = file_contents.replace("\n", "")
file_contents = file_contents.split("]")[:-1]
for line in file_contents:
    line = line.replace(" has small objects: ", "")
    path_info, small_object_ids = line.split("[")
    if "..." in small_object_ids:
        # then there are lots of small objects:
        small_object_ids = "lots (thousands?)"
    else:
        small_object_ids = [int(num) for num in small_object_ids.split()]
    dataset, crop, dataset_name = path_info.split("/")
    url = df.loc[
        (df["collection"] == dataset) & (df["crop_number"] == crop), "url"
    ].iloc[0]
    # read in csv

    output.append([dataset, crop, dataset_name, url, small_object_ids])

# write out csv
df = pd.DataFrame(
    output,
    columns=["collection", "crop_number", "dataset_name", "url", "small_object_ids"],
)
df.to_csv("isolated_pixels.csv", index=False)

### Recheck with connectivity 2

In [1]:
from image_data_interface import ImageDataInterface
import pandas as pd
import os
import numpy as np
import logging
import skimage
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster

cluster = LocalCluster(n_workers=8, threads_per_worker=1)


def process_rows(rows):
    logging.getLogger().setLevel(logging.WARNING)
    # create empty dataframe that matches the input
    output = []
    for _, row in rows.iterrows():
        collection = row["collection"]
        crop = row["crop_number"]
        dataset_name = row["dataset_name"]
        url = row["url"]
        try:
            data = ImageDataInterface(
                f"/nrs/cellmap/data/{collection}/{collection}.zarr/recon-1/labels/groundtruth/{crop}/{dataset_name}/s0"
            ).to_ndarray_ts()
        except:
            raise Exception(
                f"Nothing found at path /nrs/cellmap/data/{collection}/{collection}.zarr/recon-1/labels/groundtruth/{crop}/{dataset_name}/s0"
            )
        reconnected = skimage.measure.label(data > 0, connectivity=2)
        ids, counts = np.unique(reconnected[reconnected > 0], return_counts=True)
        ids = ids[counts <= 5]

        small_ids = []
        for id in ids:
            original_id = np.unique(data[reconnected == id])[0]
            small_ids.append(original_id)

        if len(small_ids) > 0:
            output.append(
                [
                    collection,
                    crop,
                    dataset_name,
                    url,
                    small_ids,
                ]
            )
    return pd.DataFrame(output, columns=rows.columns)


output = []
isolated_pixels_connectivity_1_df = pd.read_csv(
    "/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/isolated_pixels_connectivity_1.csv"
)

# get local ip

with Client(cluster) as client:

    ddf = dd.from_pandas(isolated_pixels_connectivity_1_df, npartitions=100)
    print(f'Check {client.cluster.dashboard_link.replace("127.0.0.1","ackermand-ws2")}')
    result_ddf = ddf.map_partitions(process_rows, meta=ddf)
    output = result_ddf.compute()
# for _, row in tqdm(
#     isolated_pixels_connectivity_1_df.iterrows(),
#     total=len(isolated_pixels_connectivity_1_df),
# ):
#     collection = row["collection"]
#     crop = row["crop_number"]
#     dataset_name = row["dataset_name"]
#     url = row["url"]
#     data = ImageDataInterface(
#         f"/nrs/cellmap/data/{collection}/{collection}.zarr/recon-1/labels/groundtruth/{crop}/{dataset_name}/s0"
#     ).to_ndarray_ts()
#     reconnected = skimage.measure.label(data > 0, connectivity=2)
#     ids, counts = np.unique(reconnected[reconnected > 0], return_counts=True)
#     ids = ids[counts <= 5]

#     small_ids = []
#     for id in ids:
#         original_id = np.unique(data[reconnected == id])[0]
#         small_ids.append(original_id)
#     if len(small_ids) > 0:
#         output.append(
#             [
#                 collection,
#                 crop,
#                 dataset_name,
#                 url,
#                 small_ids,
#             ]
#         )

INFO:To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
INFO:State start
INFO:Found stale lock file and directory '/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/tmp/dask-scratch-space/scheduler-dsh9fd3d', purging
INFO:Found stale lock file and directory '/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/tmp/dask-scratch-space/scheduler-e7izl8zr', purging
INFO:Found stale lock file and directory '/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/tmp/dask-scratch-space/scheduler-hqn1q9aw', purging
INFO:Found stale lock file and directory '/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/tmp/dask-scratch-space/worker-1lrhq7ap', purging
INFO:Found stale lock file and directory '/groups/scicompsoft/home/ackermand/Programming/segmentation_challenge_meshes/crop_check/

Check http://ackermand-ws2:8787/status


INFO:Remove client Client-13c0c5c7-8692-11ef-a321-b496913a7210
INFO:Received 'close-stream' from tcp://127.0.0.1:53100; closing.
INFO:Remove client Client-13c0c5c7-8692-11ef-a321-b496913a7210
INFO:Close client connection: Client-13c0c5c7-8692-11ef-a321-b496913a7210


KeyboardInterrupt: 

In [10]:
output

Unnamed: 0,collection,crop_number,dataset_name,url,small_object_ids
0,jrc_cos7-1a,crop234,er_lum,https://neuroglancer-demo.appspot.com#!https:/...,"[7, 8]"
1,jrc_cos7-1a,crop237,endo,https://neuroglancer-demo.appspot.com#!https:/...,"[7, 8]"
0,jrc_cos7-1a,crop237,endo_mem,https://neuroglancer-demo.appspot.com#!https:/...,"[7, 8]"
1,jrc_cos7-1a,crop237,er_lum,https://neuroglancer-demo.appspot.com#!https:/...,"[2, 5, 6]"
0,jrc_cos7-1a,crop247,vim,https://neuroglancer-demo.appspot.com#!https:/...,"[34, 36, 37, 39, 42, 43, 45, 46, 55, 62, 79]"
0,jrc_cos7-1a,crop248,cyto,https://neuroglancer-demo.appspot.com#!https:/...,[2]


In [8]:
process_rows(isolated_pixels_connectivity_1_df)

Unnamed: 0,collection,crop_number,dataset_name,url,small_object_ids
0,jrc_cos7-1a,crop248,cyto,https://neuroglancer-demo.appspot.com#!https:/...,[2]


In [1]:
import socket

# Get the local hostname
hostname = socket.gethostname()

In [2]:
hostname

'ackermand-ws2'