## Notebook for the development of analysis tools for heterogeneous reconstructions made with CryoSPARC or CryoDRGN


In [1]:
### imports
# general
import os
import numpy as np
import pandas as pd

# roodmus
from importlib import reload
import roodmus.analysis.utils
reload(roodmus.analysis.utils)
from roodmus.analysis.utils import load_data
# from roodmus.analysis.plot_heterogeneous_reconstruction import (
# )

In [None]:
### data loading
### steered MD data set for the Covid-19 spike protein (PDB id: 6xm5)
config_dir = "/home/mjoosten1/projects/roodmus/data/6xm5_steered_Roodmus_2/mrc/"
meta_file = "data/6xm5_steered_Roodmus_2/cryoSPARC/J577_passthrough_particles.cs"
jobtypes = {
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J526_passthrough_particles.cs": "Flexible refinement",
    "data/6xm5_steered_Roodmus_2/cryoSPARC/J577_passthrough_particles.cs": "Flexible refinement",
}
latent_file = "data/6xm5_steered_Roodmus_2/cryoSPARC/J577_latents_022224.cs"

particle_diameter = 100 # approximate particle diameter in Angstroms
ugraph_shape = (4000, 4000) # shape of the micrograph in pixels. Only needs to be given if the metadata file is a .star file
verbose = True
ignore_missing_files = True
enable_tqdm = True

analysis = load_data(meta_file, config_dir, particle_diameter, ugraph_shape=ugraph_shape, verbose=verbose, enable_tqdm=enable_tqdm, ignore_missing_files=ignore_missing_files) # creates the class


In [None]:
### turn the loaded data into a pandas dataframe
df_picked = pd.DataFrame(analysis.results_picking)
df_truth = pd.DataFrame(analysis.results_truth)
df_picked.tail()


In [None]:
df_precision, df_picked = analysis.compute_precision(df_picked, df_truth, verbose=verbose)
df_precision.tail()

In [None]:
# add the latent space coordinates to the dataframe
import roodmus.analysis.utils
reload(roodmus.analysis.utils)
from roodmus.analysis.utils import IO
latent_space, ndim = IO.get_latents_cs(latent_file)

for i in range(ndim):
    df_picked["latent_{}".format(i)] = latent_space[:, i]

df_picked.tail()

In [None]:
# plot the latent space
import roodmus.analysis.plot_heterogeneous_reconstruction
reload(roodmus.analysis.plot_heterogeneous_reconstruction)
from roodmus.analysis.plot_heterogeneous_reconstruction import (
    plot_latent_space
)

grid = plot_latent_space(
    df_picked,
    ndim,
    dim_1=0,
    dim_2=1,
    pca=True,
    color_by="TP",
    hexbin=False,
    palette="Set1",
)

In [None]:
# hexbin plot
grid = sns.jointplot(
    x="PC1",
    y="PC2",
    data=df,
    kind="hex",
    color="k",
    gridsize=55,
    bins="log",
    cmap="coolwarm",
    marginal_kws=dict(bins=100, fill=False),
)

grid = sns.jointplot(
    x="latent_0",
    y="latent_1",
    data=df_picked,
    kind="hex",
    color="k",
    gridsize=55,
    bins="log",
    cmap="coolwarm",
    marginal_kws=dict(bins=100, fill=False),
)



In [None]:
# scatter plot coloured by TP
grid = sns.jointplot(
    x="PC1",
    y="PC2",
    data=df,
    hue="TP",
    s=4,
    alpha=0.5,
    palette=[]
)


In [None]:
# scatter plot coloured by class2D
grid = sns.jointplot(
    x="PC1",
    y="PC2",
    data=df,
    hue="class2D",
    s=4,
    alpha=0.5,
)