In [1]:
import polars as pl
import os

In [2]:
# Get single cell tool inputs for cells likely to be in each cell cycle stage
marker = "Tgl3"
output_dir = f"/mnt/c/Users/peree/OneDrive/Desktop/CompBio_Code/markerproject_redux/cell_cycle_classification/{marker}/all_classified_cells/sct_inputs_for_low_confidence_classification"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

coordinates = pl.read_csv(f"/home/alex/alex_files/markerproject_redux/coordinates/{marker}/all_overlay_paths.csv")
all_classified_cells = (
    pl
    .read_csv(f"/home/alex/alex_files/markerproject_redux/cell_cycle_classification/{marker}/all_classified_cells/all_classified_cells.csv")
    .join(coordinates, on="Cell_ID")
    .select(["Cell_ID", "Center_X", "Center_Y", "Image_Path", "Predicted_Label", "Max_Prob"])
)

In [3]:
# G1, under 60
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "G1") & 
        (pl.col("Max_Prob") < 0.6)
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/G1_under60.csv")
)

# G1, 60 to 75
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "G1") & 
        (pl.col("Max_Prob").is_between(0.6, 0.75))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/G1_60to75.csv")
)

# G1, 75 to 80
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "G1") & 
        (pl.col("Max_Prob").is_between(0.75, 0.8))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/G1_75to80.csv")
)

# G1, 85 to 90
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "G1") & 
        (pl.col("Max_Prob").is_between(0.85, 0.90))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/G1_85to90.csv")
)

# G1, 90 to 95
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "G1") & 
        (pl.col("Max_Prob").is_between(0.90, 0.95))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/G1_90to95.csv")
)

In [4]:
# SG2, under 60
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "SG2") & 
        (pl.col("Max_Prob") < 0.6)
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/SG2_under60.csv")
)

# SG2, 60 to 75
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "SG2") & 
        (pl.col("Max_Prob").is_between(0.6, 0.75))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/SG2_60to75.csv")
)

In [5]:
# MAT, under 60
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "MAT") & 
        (pl.col("Max_Prob") < 0.6)
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/MAT_under60.csv")
)

# MAT, 60 to 75
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "MAT") & 
        (pl.col("Max_Prob").is_between(0.6, 0.75))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/MAT_60to75.csv")
)

# MAT, 75 to 85
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "MAT") & 
        (pl.col("Max_Prob").is_between(0.75, 0.85))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/MAT_75to85.csv")
)

# MAT, 85 to 90
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "MAT") & 
        (pl.col("Max_Prob").is_between(0.85, 0.90))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/MAT_85to90.csv")
)

# MAT, 90 to 95
(
    all_classified_cells
    .filter(
        (pl.col("Predicted_Label") == "MAT") & 
        (pl.col("Max_Prob").is_between(0.90, 0.95))
        )
    .select(["Cell_ID", "Image_Path", "Center_X", "Center_Y"])
    .unique()
    .sample(fraction=1, with_replacement=False, shuffle=True, seed=1705)
    .write_csv(f"{output_dir}/MAT_90to95.csv")
)