In [2]:
from pathlib import Path
import pandas as pd
import numpy as np

import starfile

In [None]:
def rln_split_classif_star(
        classif_star_file: Path,
        overwrite: bool = False,
):
    """ Split up the run_it025_data.star file into separate star files for each class specified by rlnClassNumber.

        Args:
                classif_star_file (Path): Path to the run_it025_data.star file
                overwrite (bool): Whether to overwrite existing star files

        Returns:
                classes_dict (dict): Dictionary of "particles" and "optics" for each class

    """

    classif_star_file = Path(classif_star_file).absolute()
    classif_star = starfile.read(classif_star_file)
    classif_particles_df = classif_star["particles"]
    classif_optics_df = classif_star["optics"]

    # Create a dictionary of classes
    classes_dict = {"particles": None, "optics": classif_optics_df}

    # Get the list of class numbers from rlnClassNumber
    class_number_list = sorted(classif_particles_df["rlnClassNumber"].unique())

    for class_number in class_number_list:

        class_star_file = Path(classif_star_file.parent / f"class_{class_number}_it025_data.star").absolute()
        classes_dict[class_number]["particles"] = classif_particles_df[classif_particles_df["rlnClassNumber"] == class_number]

        # Write the star file
        starfile.write(class_star_file, classes_dict[class_number])
    
    return classes_dict



In [13]:
classif_data_star_file = Path('/mnt/scratch/ribosomes/kas_k44a/relion/Class3D/bin10_K12_local_15deg/run_it025_data.star').absolute()

classif_data_star = starfile.read(classif_data_star_file)
classif_data_particles_df= classif_data_star['particles']
classif_data_optics_df = classif_data_star['optics']

class_dict = {}
class_numbers = sorted(classif_data_particles_df['rlnClassNumber'].unique())

for class_number in class_numbers:
    class_dict[class_number] = {
        "particles" : classif_data_particles_df[classif_data_particles_df['rlnClassNumber'] == class_number],
        "optics": classif_data_optics_df[classif_data_optics_df['rlnClassNumber'] == class_number],
        }



In [16]:
for class_number in sorted(classif_data_particles_df["rlnClassNumber"].unique()):
    subset_star_dict = {
        "optics": None, 
        "particles": None,
        }

    subset_star_dict['optics'] = classif_data_optics_df 


In [12]:
def select_classes(
    class_data_star: str,
    classes_to_select: str,
    ) -> None:
    """Select a subset of classes from a class data star file."""
    class_data_star = Path(class_data_star).absolute()
    output_dir = class_data_star.parent
    classes_to_select = [int(x) for x in classes_to_select.split(",")]

    class_data = starfile.read(class_data_star)
    particle_data = class_data["particles"]
    selected_data = particle_data.loc[particle_data["rlnClassNumber"].isin(classes_to_select)]
    selected_data_star = output_dir / f"{class_data_star.stem}_selectedclasses.star"
    class_data["particles"] = selected_data
    starfile.write(class_data, selected_data_star, overwrite=True)
    #starfile.write(selected_data, selected_data_star, overwrite=True)

In [13]:
class_data_star = "/mnt/scratch/ribosomes/wws_EGFcontrol/Class3D/job021/run_it025_data.star"
classes_to_select = "4"
select_classes(class_data_star, classes_to_select)