# Plotting functions for supplementary figure 5 of the manuscript
In this figure we show a hexbin plot of the latent space of the cryoDRGN training of the SARS-CoV-2 spike protein dataset


In [None]:
# imports
import os
import pandas as pd

# roodmus
from roodmus.analysis.utils import load_data
from roodmus.heterogeneity.hetRec import HetRec
from roodmus.heterogeneity.plot_heterogeneous_reconstruction import plot_latent_space_hexbin


## panel A
hexbin plot of the latent space for the covid spike trimer cryodrgn training

In [None]:
# data loading for DE-Shaw covid spike partially open set
project_dir = "/tudelft/mjoosten1/staff-umbrella/ajlab/MJ/projects/Roodmus/data/DE-Shaw_covid_spike_protein/20231116_DESRES-Trajectory_sarscov2-11021571-all-glueCA"
ugraph_dir = "/tudelft/mjoosten1/staff-umbrella/ajlab/MJ/projects/Roodmus/data/DE-Shaw_covid_spike_protein/DESRES-Trajectory_sarscov2-11021571-all-glueCA"
config_dir = os.path.join(ugraph_dir, "Micrographs")
figures_dir = os.path.join(project_dir, "figures")
meta_file = os.path.join(project_dir, "cryoDRGN", "run_data.star")
jobtypes = {
     os.path.join(project_dir, "cryoDRGN", "run_data.star"): "cryoDRGN",
}
latent_file =  os.path.join(project_dir, "cryoDRGN", "train_320", "z.19.pkl")

particle_diameter = 100 # approximate particle diameter in Angstroms
ugraph_shape = (4000, 4000) # shape of the micrograph in pixels. Only needs to be given if the metadata file is a .star file
verbose = True
ignore_missing_files = True
enable_tqdm = True

analysis = load_data(meta_file, config_dir, particle_diameter, ugraph_shape=ugraph_shape, verbose=verbose, enable_tqdm=enable_tqdm, ignore_missing_files=ignore_missing_files) # creates the class
df_picked = pd.DataFrame(analysis.results_picking)
df_truth = pd.DataFrame(analysis.results_truth)
df_precision, df_picked = analysis.compute_precision(df_picked, df_truth, verbose=verbose)

df_picked, ndim = HetRec.add_latent_space_coordinates(
    latent_file=latent_file,
    df_picked=df_picked,
)
df_picked, pca = HetRec.compute_PCA(
    df_picked=df_picked,
    ndim=ndim,
)

df_picked.tail()

In [None]:
# make a hexbin plot of the latent space
dim1=0
dim2=1

grid = plot_latent_space_hexbin(
    df_picked,
    dim_1=dim1,
    dim_2=dim2,
    pca=True,
    palette="coolwarm"
)
grid.set_axis_labels(f"PCA{dim1}", f"PCA{dim2}", fontsize=16)
grid.figure.get_axes()[0].tick_params(labelsize=14)
grid.figure.get_axes()[0].set_xlim((-12, 12))
grid.figure.get_axes()[0].set_ylim((-12, 12))

# grid.savefig(os.path.join(figures_dir, f"{os.path.basename(latent_file)}_pca_{dim1}_{dim2}_hexbin.pdf"), bbox_inches="tight")
print(f"saved figure to {os.path.join(figures_dir, f'{os.path.basename(latent_file)}_pca_{dim1}_{dim2}_hexbin.pdf')}")


## panel D
hexbin plot of the latent space for the covid RTC

In [None]:
### data loading covid RTC DE-Shaw data set
project_dir = "/home/mjoosten1/projects/roodmus/data/DE-Shaw_covid_RTC/20240124_DESRES-Trajectory_sarscov2-13795965-no-water-movies"
config_dir = os.path.join(project_dir, "Movies")
figures_dir = os.path.join(project_dir, "figures")
meta_file = os.path.join(project_dir, "cryoDRGN", "run_data.star")
jobtypes = {
    os.path.join(project_dir, "cryoDRGN", "run_data.star"): "cryoDRGN",
}
latent_file = os.path.join(project_dir, "cryoDRGN", "train_320", "z.24.pkl")

particle_diameter = 100 # approximate particle diameter in Angstroms
ugraph_shape = (4000, 4000) # shape of the micrograph in pixels. Only needs to be given if the metadata file is a .star file
verbose = True
ignore_missing_files = True
enable_tqdm = True

analysis = load_data(meta_file, config_dir, particle_diameter, ugraph_shape=ugraph_shape, verbose=verbose, enable_tqdm=enable_tqdm, ignore_missing_files=ignore_missing_files) # creates the class
df_picked = pd.DataFrame(analysis.results_picking)
df_truth = pd.DataFrame(analysis.results_truth)
df_precision, df_picked = analysis.compute_precision(df_picked, df_truth, verbose=verbose)

df_picked, ndim = HetRec.add_latent_space_coordinates(
    latent_file=latent_file,
    df_picked=df_picked,
)
df_picked, pca = HetRec.compute_PCA(
    df_picked=df_picked,
    ndim=ndim,
)
df_picked.tail()


In [None]:
# make a hexbin plot of the latent space
dim1=0
dim2=1

grid = plot_latent_space_hexbin(
    df_picked,
    dim_1=dim1,
    dim_2=dim2,
    pca=True,
    palette="coolwarm"
)
grid.set_axis_labels(f"PCA{dim1}", f"PCA{dim2}", fontsize=16)
grid.figure.get_axes()[0].tick_params(labelsize=14)
grid.figure.get_axes()[0].set_xlim((-12, 12))
grid.figure.get_axes()[0].set_ylim((-12, 12))

grid.savefig(os.path.join(figures_dir, f"{os.path.basename(latent_file)}_pca_{dim1}_{dim2}_hexbin.pdf"), bbox_inches="tight")
print(f"saved figure to {os.path.join(figures_dir, f'{os.path.basename(latent_file)}_pca_{dim1}_{dim2}_hexbin.pdf')}")
