In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import os
import torch
import matplotlib.pyplot as plt
import numpy as np

# Creating your submission_config file

This file will tell the preprocessing pipeline how each submission should be processed. I will show two examples of possible submissions and explain how different parameters affect the pipeline.

In [29]:
path_to_sub_set = "/path/to/submissions/"

submission1_path = os.path.join(path_to_sub_set, "path/to/submission1")
submission2_path = os.path.join(path_to_sub_set, "path/to/submission2")

path_gt = "/path/to/ground_truth/"

submission_config = {
    "gt": {
        "name": "gt",
        "path": path_gt,
        "box_size": 224,
        "pixel_size": 1.073 * 2,
        "ref_align_fname": "1.mrc",
    },
    0: {
        "name": "submission1",
        "align": 0,
        "box_size": 144,
        "pixel_size": 1.073 * 2,
        "path": submission1_path,
        "populations_file": "path/to/populations_file1",
    },
    1: {
        "name": "submission2",
        "align": 1,
        "box_size": 288,
        "pixel_size": 1.073,
        "path": submission2_path,
    },
}

In [31]:
# create submission config
with open("submission_config.json", "w") as f:
    json.dump(submission_config, f, indent=4)

# load submission_config from json
with open("submission_config.json", "r") as f:
    submission_config = json.load(f)

After you create your submission_config, simply grab a copy of the file "config_preproc.yaml" from the provided config_files, and change the path for the "submission_config_file" to the file we created in the previous cell. Also change the path for the output. The rest of the parameters you can leave untouched. Please see the publication "Singer, A., & Yang, R. (2024). Alignment of density maps in Wasserstein distance. Biological Imaging, 4, e5" for more details. Then simply run

```bash
cryo_challenge run_preprocessing --config /path/to/config_preproc.yaml
```

Note: make sure to activate your environment and have the package installed!

You can run the following cell to visualize your volumes (more precisely, a projection of them)

In [None]:
n_submissions = 2  # change this to however many submissions you preprocessed

fig, ax = plt.subplots(2, 6, figsize=(20, 8))  # change values here too

for i in range(n_submissions):
    idx = np.random.randint(
        0, 20
    )  # doing random volumes to check that everything went fine

    submission = torch.load(f"/path/to/output/submission_{i}.pt")
    print(submission["volumes"].shape, submission["id"])
    ax.flatten()[i].imshow(submission["volumes"][idx].sum(axis=0))
    ax.flatten()[i].set_title(submission["id"])