In [None]:
# !pip download -r requirements.txt --quiet # run this once, with internet
# download and upload as dataset
! mkdir -p /kaggle/working/asciitree
! cp /kaggle/input/d/etiennedoumazane/asciitree/asciitree-0.3.3.tar.gz_ /kaggle/working/asciitree/asciitree-0.3.3.tar.gz
! pip install asciitree --no-index --find-links /kaggle/working/asciitree --quiet
! pip install zarr --no-index --find-links /kaggle/input/zarr-dependencies --quiet

In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import zarr
from skimage.feature import peak_local_max
from skimage.filters import gaussian


In [6]:
kaggle_data_dir = '/kaggle/input/czii-cryo-et-object-identification'
data_dir = Path(os.environ.get("DATA_DIR", kaggle_data_dir))

PARTICLE_TYPES = ['apo-ferritin',
 'beta-galactosidase',
 'ribosome',
 'thyroglobulin',
 'virus-like-particle']

FREQUENCIES = {
    "apo-ferritin": 0.3,
    "beta-galactosidase": 0.1,
    "ribosome": 0.3,
    "thyroglobulin": 0.2,
    "virus-like-particle": 0.1
}

SCALE = (10, 10, 10)


In [9]:
def generate_experiment_list(data_dir):
    return os.listdir(data_dir / 'test/static/ExperimentRuns')

def generate_path_list(data_dir):
    experiments = generate_experiment_list(data_dir)
    return [data_dir / f'test/static/ExperimentRuns/{experiment}/VoxelSpacing10.000/denoised.zarr' for experiment in experiments]

def get_coordinates(path):
    arr = zarr.open(path)[0]
    coordinates_all = []
    for sigma in [5, 10, 15]:
        gaussian_filtered = gaussian(arr, sigma)
        coordinates = peak_local_max(1-gaussian_filtered, min_distance=sigma) * np.array(SCALE)
        coordinates_all.extend(coordinates)
    return coordinates_all

def generate_random_particle_list(n_particles):
    particles = []
    for _ in range(n_particles):
        particle_type = np.random.choice(list(FREQUENCIES.keys()), p=list(FREQUENCIES.values()))
        particles.append(particle_type)
    return particles


def generate_table(data_dir):
    paths = generate_path_list(data_dir)
    dfs = []
    for path in paths:
        print(path)
        coordinates = get_coordinates(path)
        particles = generate_random_particle_list(len(coordinates))
        df = pd.DataFrame(coordinates, columns=["x", "y", "z"])
        df["experiment"] = path.parts[-3]
        df["particle_type"] = particles
        dfs.append(df)
    df = pd.concat(dfs, ignore_index=True)
    df = df.reset_index(drop=False).rename(columns={"index": "id"})
    return df


In [10]:
df = generate_table(data_dir)
df.to_csv(data_dir / "submission-3.csv", index=False)

/Users/edmz/code/kaggle/cryoet/data/test/static/ExperimentRuns/TS_5_4/VoxelSpacing10.000/denoised.zarr
/Users/edmz/code/kaggle/cryoet/data/test/static/ExperimentRuns/TS_6_4/VoxelSpacing10.000/denoised.zarr
/Users/edmz/code/kaggle/cryoet/data/test/static/ExperimentRuns/TS_69_2/VoxelSpacing10.000/denoised.zarr


In [5]:
df.to_csv(data_dir / "submission-3.csv", index=False)