In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import os
import torch
import matplotlib.pyplot as plt
import yaml
from ipyfilechooser import FileChooser

# Creating your submission_config file

This file will tell the preprocessing pipeline how each submission should be processed. I will show two examples of possible submissions and explain how different parameters affect the pipeline.

Note that the submission directories need maps numbered 01.mrc to 80.mrc and a populations.txt file that has the corresponding populations weights (no header) each separated by a newline (so 80 rows in total), such that the total sums to 1.

In [None]:
# Select path to submissions
path_to_sub_set = FileChooser(os.path.expanduser("~"))
path_to_sub_set.show_only_dirs = True
display(path_to_sub_set)

In [None]:
# Select path to submissions (submission 1)
submission1_path = FileChooser(path_to_sub_set.selected_path)
submission1_path.show_only_dirs = True
display(submission1_path)

In [None]:
# Select path to populations (submission 1)
submission1_pop_path = FileChooser(path_to_sub_set.selected_path)
display(submission1_pop_path)

In [None]:
# Select path to submissions (submission 2)
submission2_path = FileChooser(path_to_sub_set.selected_path)
submission2_path.show_only_dirs = True
display(submission2_path)

In [None]:
# Select path to populations (submission 2)
submission2_pop_path = FileChooser(path_to_sub_set.selected_path)
display(submission2_pop_path)

In [None]:
submission2_pop_path.selected

In [None]:
# Select path to Ground Truth
path_gt = FileChooser(os.path.expanduser("~"))
path_gt.show_only_dirs = True
display(path_gt)

In [None]:
submission_config = {
    "gt": {
        "name": "gt",
        "path": path_gt.selected_path,
        "box_size": 224,
        "pixel_size": 1.073 * 2,
        "ref_align_fname": "1.mrc",
    },
    0: {
        "path": submission1_path.selected_path,
        "populations_file": submission1_pop_path.selected,
        "name": "submission1",
        "submission_version": 0,  # does not change the submission id
        "box_size": 144,
        "pixel_size": 1.073 * 2,
        "flip": 0,
        "align": 0,
    },
    1: {
        "path": submission2_path.selected_path,
        "populations_file": submission2_pop_path.selected,
        "name": "submission2",
        "submission_version": 1,  # makes the id "ice cream name 1"
        "box_size": 288,
        "pixel_size": 1.073,
        "flip": 1,  # flip the z axis. DO AN ALIGN if you set this to 1
        "align": 1,
    },
}

In [None]:
# create submission config
with open("submission_config.json", "w") as f:
    json.dump(submission_config, f, indent=4)

# load submission_config from json
with open("submission_config.json", "r") as f:
    submission_config = json.load(f)

Lastly, to run the preprocessing pipeline follow these steps

0. Make sure to activate your environment and have the package installed!

1. Grab a copy of the file `config_preproc.yaml`from our config file templates.

2. In the copied config file, update the value of `submission_config_file` to match the path to the file we created in the last cell.

3. Optionally, change the other parameters. 
    * Most of the parameters (BOT_* and thresh_percentile) are for the alignment. For details on how they work, please see the publication "Singer, A., & Yang, R. (2024). Alignment of density maps in Wasserstein distance. Biological Imaging, 4, e5" for more details. 

    * The other parameters are self explanatory, "seed_flavor_assignment" changes which submission gets assigned which ice cream flavor, keep this if you want to revert anonymity.

4. Run the command: `cryo_challenge run_preprocessing --config /path/to/config_preproc.yaml`

You can run the following cell to visualize your volumes (more precisely, a projection of them)


In [None]:
# Select path to Config file
# An example of this file is available in the path ../config_files/config_preproc.yaml
config_preproc_path = FileChooser(os.path.expanduser("~"))
config_preproc_path.filter_pattern = '*.yaml'
display(config_preproc_path)

In [None]:
# Get output path from config file
with open(config_preproc_path.value, "r") as f:
    config_preproc = yaml.safe_load(f)
output_path = config_preproc["output_path"]

if os.path.isabs(output_path):
    full_output_path = output_path
else:
    full_output_path = os.path.join(os.getcwd(), '..', output_path)

In [None]:
n_submissions = 2  # change this to however many submissions you preprocessed

fig, ax = plt.subplots(1, 2, figsize=(10, 4))  # change values here too

for i in range(n_submissions):
    idx = 0

    submission = torch.load(os.path.join(full_output_path, f"submission_{i}.pt"))
    print(submission["volumes"].shape, submission["id"])
    ax.flatten()[i].imshow(submission["volumes"][idx].sum(axis=0))
    ax.flatten()[i].set_title(submission["id"])