In [None]:
%load_ext autoreload
%autoreload 2

In [31]:
import os
import yaml
import pickle
from ipyfilechooser import FileChooser

from cryo_challenge.data._validation.config_validators import (
    validate_input_config_mtm,
    validate_config_mtm_data, 
        validate_config_mtm_data_submission, 
        validate_config_mtm_data_ground_truth, 
        validate_config_mtm_data_mask, 
    validate_config_mtm_analysis, 
        validate_config_mtm_analysis_normalize, 
    )
from cryo_challenge.data._validation.output_validators import MapToMapResultsValidator
from cryo_challenge.data._validation.config_validators import validate_maptomap_result

After you preprocess your data (submission_*.pt), you can compute a map to map distance matrix, for various distance functions, by the following steps.

# 1. Create a config file
The config file (e.g. `config_files/config_map_to_map_distance_matrix.yaml`) has information about how to read the preprocessed submission and ground truth data, and how to perform the map to map distance analysis. 

Here is an example of the config file

In [None]:
# Select path to Map to Map config file
# An example of this file is available in the path ../config_files/config_map_to_map.yaml
config_m2m_path = FileChooser(os.path.expanduser("~"))
config_m2m_path.filter_pattern = '*.yaml'
display(config_m2m_path)

In [26]:
with open(config_m2m_path.value, "r") as file:
    config = yaml.safe_load(file)

config

{'data': {'n_pix': 224,
  'psize': 2.146,
  'submission': {'fname': 'data/submission_0.pt',
   'volume_key': 'volumes',
   'metadata_key': 'populations',
   'label_key': 'id'},
  'ground_truth': {'volumes': 'data/maps_gt_flat.pt',
   'metadata': 'data/metadata.csv'},
  'mask': {'do': True, 'volume': 'data/mask_dilated_wide_224x224.mrc'}},
 'analysis': {'metrics': ['l2', 'corr', 'bioem', 'fsc'],
  'chunk_size_submission': 80,
  'chunk_size_gt': 190,
  'normalize': {'do': True, 'method': 'median_zscore'}},
 'output': 'results/map_to_map_distance_matrix_submission_0.pkl'}

These docstrings explain what the config file entries are

In [None]:
validate_input_config_mtm?

In [None]:
validate_config_mtm_data_submission?

In [None]:
validate_config_mtm_data_ground_truth?

In [None]:
validate_config_mtm_data_mask?

In [None]:
validate_config_mtm_data?

In [None]:
validate_config_mtm_analysis?

In [None]:
validate_config_mtm_analysis_normalize?

The whole config is validated in `cryo_challenge.data._validation.config_validators.validate_input_config_mtm`.

In [None]:
validate_input_config_mtm(config)

# 2. Run `run_map2map_pipeline`

`cryo_challenge run_map2map_pipeline --config config_files/config_map_to_map_distance_matrix.yaml`

The output (of step #1) is validated with `cryo_challenge.data._validation.output_validators.MapToMapResultsValidator` (automatically, this is just explained below to serve as documentation)

In [27]:
with open(os.path.join('../',config["output"]), "rb") as f:
    results_dict = pickle.load(f)


In [28]:
_ = MapToMapResultsValidator.from_dict(results_dict)

The fields of the output are explained below:

In [29]:
MapToMapResultsValidator?

[0;31mInit signature:[0m
[0mMapToMapResultsValidator[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mconfig[0m[0;34m:[0m [0mdict[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0muser_submitted_populations[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcorr[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mdict[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0ml2[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mdict[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbioem[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mdict[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfsc[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mdict[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m-

For each map to map distance, there is a separate dict of results containing: 

In [32]:
validate_maptomap_result?

[0;31mSignature:[0m [0mvalidate_maptomap_result[0m[0;34m([0m[0moutput_dict[0m[0;34m:[0m [0mdict[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Validate the output dictionary of the map-to-map distance matrix computation.

cost_matrix: pd.DataFrame, is the cost matrix, with ground truth rows and submission columns.
user_submission_label: str, is the label of the submission.
computed_assets: dict, is a dictionary of computed assets, which can be re-used in other analyses.
[0;31mFile:[0m      /mnt/ceph/users/gwoollard/repos/Cryo-EM-Heterogeneity-Challenge-1/src/cryo_challenge/data/_validation/config_validators.py
[0;31mType:[0m      function