## Notebook showcasing the various options in Roodmus for analysis and visualisations
In this notebook, the user can load metadata from one or several jobs from a processing pipeline done in RELION or cryoSPARC. This metadata, along with the ground-truth particle parameters are loaded into data frames, which allow for easy and conveniet plotting. We also provide several convenient functions to make plots.


In [None]:
### imports
# general
import numpy as np
import pandas as pd

# roodmus
from roodmus.analysis.utils import load_data
from roodmus.analysis.plot_ctf import plot_CTF, plot_defocus_scatter
from roodmus.analysis.plot_picking import (
    label_micrograph_picked, 
    label_micrograph_truth,
    label_micrograph_truth_and_picked, 
    plot_precision, plot_recall, 
    plot_boundary_investigation,
    plot_overlap_investigation, 
    plot_precision_and_recall, 
    plot_f1_score
)
from roodmus.analysis.plot_frames import plot_frame_distribution
from roodmus.analysis.plot_classes import plot_2Dclass_precision, plot_2Dclasses_frames
from roodmus.analysis.plot_alignment import plot_picked_pose_distribution, plot_true_pose_distribution


In [None]:
### data loading
config_dir = "/home/mjoosten1/projects/roodmus/data/6xm5_steered_Roodmus_2/mrc/"
meta_files = [
    # "data/6xm5_steered_Roodmus_2/cryoSPARC/J508_picked_particles.cs",
    # "data/6xm5_steered_Roodmus_2/cryoSPARC/J511_050_particles.cs",
    # "data/6xm5_steered_Roodmus_2/cryoSPARC/J513_passthrough_particles_selected.cs",
    # "data/6xm5_steered_Roodmus_2/cryoSPARC/J515_topaz_picked_particles.cs",
    # "data/6xm5_steered_Roodmus_2/cryoSPARC/J518_050_particles.cs",
    # ["data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_00_final_particles.cs",
    #      "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_passthrough_particles_class_0.cs"],
    # ["data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_01_final_particles.cs",
    #     "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_passthrough_particles_class_1.cs"],
    # ["data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_02_final_particles.cs",
    #     "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_passthrough_particles_class_2.cs"],
    ["data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_03_final_particles.cs",
        "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_passthrough_particles_class_3.cs"],
]

jobtypes = {
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J508_picked_particles.cs": "blob picker",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J511_050_particles.cs": "2D classification",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J513_passthrough_particles_selected.cs": "2D class selection",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J515_topaz_picked_particles.cs": "topaz picking",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J518_050_particles.cs": "2D classification 2",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_00_final_particles.cs": "3D class 0",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_01_final_particles.cs": "3D class 1",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_02_final_particles.cs": "3D class 2",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J519_class_03_final_particles.cs": "3D class 3",
}

particle_diameter = 100 # approximate particle diameter in Angstroms
ugraph_shape = (4000, 4000) # shape of the micrograph in pixels. Only needs to be given if the metadata file is a .star file
verbose = True

for i, meta_file in enumerate(meta_files):
    if i == 0:
        analysis = load_data(meta_file, config_dir, particle_diameter, ugraph_shape=ugraph_shape, verbose=verbose) # creates the class
    else:
        analysis.add_data(meta_file, config_dir, verbose=verbose) # updates the class with the next metadata file


In [None]:
### data loading
config_dir = "/home/mjoosten1/projects/roodmus/data/DESRES/MapReconstruction/Micrographs/"
meta_files = [
    "data/DESRES/MapReconstruction/Extract/job007/particles.star",
    "data/DESRES/MapReconstruction/Class2D/job008/run_it200_data.star",
    "data/DESRES/MapReconstruction/Select/job009/particles.star",
    "data/DESRES/MapReconstruction/InitialModel/job010/run_it100_data.star",
    # "data/DESRES/MapReconstruction/Class3D/job011/run_it000_data.star",
    "data/DESRES/MapReconstruction/Extract/job013/particles.star",
    "data/DESRES/MapReconstruction/Refine3D/job014/run_it015_data.star",
]

jobtypes = {
    "data/DESRES/MapReconstruction/Extract/job007/particles.star": "topaz picking",
    "data/DESRES/MapReconstruction/Class2D/job008/run_it200_data.star": "2D classification",
    "data/DESRES/MapReconstruction/Select/job009/particles.star": "class selection",
    "data/DESRES/MapReconstruction/InitialModel/job010/run_it100_data.star": "3D initial model",
    "data/DESRES/MapReconstruction/Class3D/job011/run_it100_data.star": "3D classification",
    "data/DESRES/MapReconstruction/Extract/job013/particles.star": "3D class selection",
    "data/DESRES/MapReconstruction/Refine3D/job014/run_it015_data.star": "3D refinement",
}

particle_diameter = 100 # approximate particle diameter in Angstroms
ugraph_shape = (4000, 4000) # shape of the micrograph in pixels. Only needs to be given if the metadata file is a .star file
verbose = True

for i, meta_file in enumerate(meta_files):
    if i == 0:
        analysis = load_data(meta_file, config_dir, particle_diameter, ugraph_shape=ugraph_shape, verbose=verbose) # creates the class
    else:
        analysis.add_data(meta_file, config_dir, verbose=verbose) # updates the class with the next metadata file


In [None]:
### turn the loaded data into a pandas dataframe
df_picked = pd.DataFrame(analysis.results_picking)
df_truth = pd.DataFrame(analysis.results_truth)
df_picked.tail()


In [None]:
### saving the dataframes
# it is recommended to save the dataframes after running the rest of the notebook, as they may be modified by downstream analysis

df_picked.to_csv("picked_particles.csv")
df_truth.to_csv("truth_particles.csv")

### CTF estimation


In [None]:
### scatter plot of the estimated vs. the true defocus values
meta_index = 0 # index of the metadata file to plot

palette = "RdBu"

fig, ax = plot_defocus_scatter(df_picked,
                                meta_files[meta_index],
                                df_truth,
                                palette=palette)


In [None]:
### plot the CTF estimation for a single micrograph
meta_index = 0 # index of the metadata file to plot
ugraph_index = 3 # which micrograph to plot

fig, ax = plot_CTF(df_picked, meta_files[meta_index],
                    df_truth, config_dir, ugraph_index)

In [None]:
### plot the CTF for the particle with the largest defocus error (should take no more than a few seconds)
max_error_index = 0
max_error = 0
for i, groupname in enumerate(df_picked.groupby(["ugraph_filename"]).groups.keys()):
    defocus_estimated = df_picked.groupby(["ugraph_filename"]).get_group(groupname)["defocusU"].mean()
    defcous_true = np.abs(df_truth.groupby(["ugraph_filename"]).get_group(groupname)["defocus"].mean())
    error = np.abs(defocus_estimated - defcous_true)
    if error > max_error:
        max_error = error
        max_error_index = i

fig, ax = plot_CTF(df_picked, None, df_truth, config_dir, max_error_index)


### Particle picking

In [None]:
### plot the picked particles
ugraph_index = 0 # which micrograph to plot
metadata_index = 3 # which metadata file to plot

fig, ax = label_micrograph_picked(df_picked, meta_files[meta_index],
                                   ugraph_index, config_dir, box_width=48, box_height=48, verbose=verbose)
ax.set_xticks([])
ax.set_yticks([])
fig.tight_layout()
fig.set_size_inches(7, 7)


In [None]:
### plot the truth particles
ugraph_index = 3 # which micrograph to plot

fig, ax = label_micrograph_truth(df_truth, ugraph_index, config_dir, box_width=32, box_height=32, verbose=verbose)
ax.set_xticks([])
ax.set_yticks([])
fig.tight_layout()
fig.set_size_inches(7, 7)


In [None]:
### plot the truth and picked particles
ugraph_index = 3 # which micrograph to plot
metadata_index = 3 # which metadata file to plot

fig, ax = label_micrograph_truth_and_picked(df_picked, meta_files[meta_index],
                                             df_truth, ugraph_index, config_dir, box_width=48, box_height=48, verbose=verbose)
ax.set_xticks([])
ax.set_yticks([])
fig.tight_layout()
fig.set_size_inches(7, 7)


In [None]:
### compute precision and recall (may take a few minutes)
df_precision, df_picked = analysis.compute_precision(df_picked, df_truth, verbose=verbose)
df_precision.tail()


In [None]:
### plot boxplot for precision and recall
fig, ax = plot_precision(df_precision, jobtypes)
fig.set_size_inches([10,10])
fig, ax = plot_recall(df_precision, jobtypes)
fig.set_size_inches([10,10])


In [None]:
### alternatively, plot the precision and recall in the same plot
fig, ax = plot_precision_and_recall(df_precision, jobtypes)


In [None]:
### plot f1-score
fig, ax = plot_f1_score(df_precision, jobtypes)
fig.set_size_inches(10,10)


In [None]:
### plot the picked particles, now with the TP and FP marked in green and red
ugraph_index = 3 # which micrograph to plot
metadata_index = 4 # which metadata file to plot

fig, ax = label_micrograph_picked(df_picked, meta_files[meta_index], ugraph_index, config_dir, box_width=48, box_height=48, verbose=verbose)
ax.set_xticks([])
ax.set_yticks([])
fig.tight_layout()
fig.set_size_inches(7, 7)

In [None]:
### plot the distribution of the particles in the ugraphs in x, y, and z directions
metadata_index = 0 # which metadata file to plot
bin_width = [100, 100, 10] # bin width for x, y, z
axis = ["x", "y", "z"]

metadata_filename = meta_files[metadata_index]
for a, bnwdth in zip(axis, bin_width):
    fig, ax = plot_boundary_investigation(df_truth, df_picked, metadata_filename, bnwdth, axis=a)

In [None]:
df_overlap = analysis.compute_overlap(df_picked, df_truth, verbose=verbose)
df_overlap.head()

In [None]:
### plot the overlap between the picked and truth particles
metadata_index = 0 # which metadata file to plot. If None, all metadata files are plotted

metadata_filename = meta_files[metadata_index]
fig, ax = plot_overlap_investigation(df_overlap, None, jobtypes=jobtypes)
ax.legend().set_visible(False)
ax.set_xlim((0, 50))
fig.tight_layout()


In [None]:
### plot the distribution of trajectory frames in a metadata file
metadata_index = 1 # which metadata file to plot

metadata_filename = meta_files[metadata_index]
if isinstance(metadata_filename, list):
    metadata_filename = metadata_filename[0]

fig, ax = plot_frame_distribution(df_picked, metadata_filename, df_truth, particle_diameter, jobtypes)


### 2D classification

In [None]:
### plot the precision per class
metadata_index = 0 # which metadata file to plot. Must have a class2D column

metadata_filename = meta_files[metadata_index]
if isinstance(metadata_filename, list):
    metadata_filename = metadata_filename[0]

fig, ax = plot_2Dclass_precision(df_picked, metadata_filename, jobtypes)


In [None]:
### plot the distribution of frames over the 2D classes
metadata_index = 0 # which metadata file to plot. Must have a class2D column

metadata_filename = meta_files[metadata_index]
if isinstance(metadata_filename, list):
    metadata_filename = metadata_filename[0]

fig, ax = plot_2Dclasses_frames(df_picked, metadata_filename, binfactor=100)

### 3D alignment

In [None]:
### plot the distribution of particle poses in the picked and truth particles
meta_index = 0

metadata_filename = meta_files[meta_index]
if isinstance(metadata_filename, list):
    metadata_filename = metadata_filename[0]

grid, vmin, vmax = plot_picked_pose_distribution(df_picked, metadata_filename)
grid.fig.set_size_inches(14, 7)
grid.fig.tight_layout()

grid, _, _ = plot_true_pose_distribution(df_truth, vmin, vmax)
grid.fig.set_size_inches(14, 7)
grid.fig.tight_layout()
