In [2]:
import os
import polars as pl

In [9]:
# Function for creating input files for single-cell tool 
# The input file columns are: Cell_ID / Image_Path / Center_X / Center_Y
# The 'phenotype' file for labeling images can just be a simple text file with keep/drop
marker = 'Sac6'
compartment = 'ap'

sct_path = f'/mnt/c/Users/peree/OneDrive/Desktop/CompBio_Code/markerproject_redux/quality_check/{marker}/sct_inputs'
fl_coords_path = f'/home/alex/alex_files/markerproject_redux/coordinates/{marker}/all_raw_paths.csv'

if not os.path.exists(sct_path):
    os.makedirs(sct_path)

#raw_features_nuc = pl.read_csv(f"/home/alex/alex_files/markerproject_redux/quality_check/{marker}/cell_and_nuclei/raw_nucleus_qc_features.csv")
#raw_features_cell = pl.read_csv(f"/home/alex/alex_files/markerproject_redux/quality_check/{marker}/cell_and_nuclei/raw_cell_qc_features.csv")
#scaled_features = pl.read_csv(f"/home/alex/alex_files/markerproject_redux/quality_check/{marker}/cell_and_nuclei/scaled_cell_qc_features.csv")
#raw_features_compartment = pl.read_csv(f"/home/alex/alex_files/markerproject_redux/quality_check/{marker}/{compartment}/raw_{compartment}_qc_features.csv")

def sct_input_maker(features, feature_name, file_name, lower_val=float('-inf'), upper_val=float('inf')):
    # Get outline crops
    subset = (
        features
        .filter(
            (pl.col(feature_name) >= lower_val) & (pl.col(feature_name) <= upper_val))
        .select(['Cell_ID', 'Image_Path', 'Center_X', 'Center_Y'])
        .sample(fraction=1, with_replacement=False, shuffle=True)
        )
    
    # Add fluorescent image crops (comment this part out if only outlines needed)
    fl_coords = (
        pl
        .read_csv(fl_coords_path)
        .filter(pl.col("Cell_ID").is_in(subset["Cell_ID"]))
        .drop(["Center_X", "Center_Y"])
        .join(subset.drop(["Image_Path"]), on="Cell_ID", how="inner")
        .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
        )
    
    subset = (
        pl
        .concat(items=[subset, fl_coords], how="vertical")
        .unique()
        .sort(["Cell_ID", "Image_Path"])
        .group_by("Cell_ID")
        .agg(pl.all())
        .sample(fraction=1, with_replacement=False, shuffle=True)
        .explode(pl.all().exclude("Cell_ID"))
        )
    
    subset.write_csv(file=f"{sct_path}/{file_name}.csv")

In [12]:
sct_input_maker(raw_features_compartment, 'APs_Intensity_IntegratedIntensity_GFP', file_name='intint', lower_val=0.12, upper_val=0.14)

Please use `implode` to return to previous behavior.

See https://github.com/pola-rs/polars/issues/22149 for more information.
  .filter(pl.col("Cell_ID").is_in(subset["Cell_ID"]))
