# Visualization Variant Painting Images and Cells

In [1]:
import os
import glob
import polars as pl
import matplotlib as mpl
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from skimage.io import imread
from tqdm import tqdm
import re
import sys
import subprocess
from skimage.transform import resize

sys.path.append("../..")
from img_utils import *
sys.path.append("./cell_img_visualization")
from display_img import *

## 1. Loading meta data and variant classification

In [3]:
OUT_IMG_DIR = f"../../2.snakemake_pipeline/outputs/gene_variant_well_images"

### 1.1 Read in the meta data

In [2]:
img_well_qc_sum_df = pl.read_parquet(IMG_QC_SUM_PARQUET_FILE)
img_well_qc_sum_df.head()

auroc_df = pl.read_csv(
    f"../outputs/2.classification_results/imaging_analyses_classification_summary_all.csv", 
    infer_schema_length=100000, separator=","
)
auroc_oneperc_df = pl.read_csv(
    f"../outputs/2.classification_results/imaging_analyses_classification_summary_one_perc.csv", 
    infer_schema_length=100000, separator=","
)
auroc_oneperc_df.unique("gene_variant")

img_phenotype_alleles = auroc_oneperc_df.filter(
    pl.col("Altered_95th_perc_both_batches_GFP")
).unique("gene_variant").sort("gene_variant").select("gene_variant").to_series().to_list()

## pillar results
# auroc_df = pl.read_csv(f"/home/shenrunx/igvf/varchamp/2025_Pillar_VarChAMP/2_individual_assay_analyses/imaging/3_outputs/pillar_img_overlapped_gene_variants.csv", 
#                        infer_schema_length=100000, separator=",").with_columns(
#                            pl.lit("2025_01_Batch_13-14").alias("Metadata_Bio_Batch")
#                        )
# auroc_df.unique("gene_allele")

## 2. Plot Variant Painting Well Images

In [None]:
def get_allele_batch(allele, score_df=auroc_df):
    return score_df.filter(pl.col("gene_variant")==allele)["Metadata_Bio_Batch"].to_list()[0]


def save_allele_imgs(variant, feat, auroc_df=auroc_df, img_well_qc_sum_df=img_well_qc_sum_df, display=False, save_img=False):
    plt.clf()
    auroc_df_batch = auroc_df.with_columns(
        pl.col(f"AUROC_Mean_{feat}").alias("AUROC_Mean"),
        pl.col(f"gene_variant").alias("allele_0")
    )
    bio_rep = get_allele_batch(variant)
    # print(bio_rep)

    ref_allele = variant.split("_")[0]
    ref_wells = img_well_qc_sum_df.filter(
        pl.col("gene_allele")==ref_allele,
        pl.col("Metadata_Bio_Batch")==bio_rep
    ).unique("imaging_well")["imaging_well"].to_list()
    var_wells = img_well_qc_sum_df.filter(
        pl.col("gene_allele")==variant,
        pl.col("Metadata_Bio_Batch")==bio_rep
    ).unique("imaging_well")["imaging_well"].to_list()

    target_file = [f for f in os.listdir(f"{OUT_IMG_DIR}") if f.startswith(f"{variant}_{feat}")]
    if target_file:
        print(target_file, "exists.")
        output_dir = ""
        if not display:
            return None
    if save_img:
        output_dir = f"{OUT_IMG_DIR}"
        print(f"Img output at {output_dir}")
    else:
        output_dir = ""

    allele_img_qc = img_well_qc_sum_df.filter(
        pl.col("Metadata_Bio_Batch")==bio_rep,
        pl.col("channel")=="GFP",
        (pl.col("gene_allele")==ref_allele) | (pl.col("gene_allele")==variant)
    )

    if bio_rep != "2024_12_Batch_11-12":
        allele_meta_df = allele_img_qc.sort(
            "plate_map_name"
        ).unique(subset=["gene_allele"], keep="first")
        if len(ref_wells)==1 and len(var_wells)==1:
            plot_allele(allele_meta_df,
                        ref=ref_allele, 
                        var=variant, 
                        sel_channel=feat, 
                        auroc_df=auroc_df_batch, 
                        plate_img_qc=allele_img_qc, 
                        site="05",
                        show_plot=display, 
                        imgs_dir=TIFF_IMGS_DIR, 
                        output_dir=output_dir)
        else:
            for ref_well in ref_wells:
                for var_well in var_wells:
                    plot_allele(allele_meta_df,
                                ref=ref_allele, 
                                var=variant, 
                                sel_channel=feat, 
                                auroc_df=auroc_df_batch, 
                                plate_img_qc=allele_img_qc, 
                                site="05", 
                                show_plot=display,
                                ref_well=[ref_well], 
                                var_well=[var_well],
                                imgs_dir=TIFF_IMGS_DIR,
                                output_dir=output_dir)
    else:
        allele_meta_df = allele_img_qc.sort(
            "plate_map_name"
        ).unique(subset=["imaging_well", "gene_allele", "plate_map_name"], keep="first")
        # print(ref_wells, var_wells)
        if len(ref_wells)==4 and len(var_wells)==4:
            plot_allele_single_plate(allele_meta_df,
                                     variant=variant, 
                                     sel_channel=feat, 
                                     auroc_df=auroc_df_batch, 
                                     plate_img_qc=allele_img_qc, 
                                     site="05",
                                     show_plot=display,
                                     imgs_dir=TIFF_IMGS_DIR, 
                                     output_dir=output_dir)
        else:
            ref_wells_idx = len(ref_wells) // 4
            var_wells_idx = len(var_wells) // 4
            for rw_idx in range(ref_wells_idx):
                for vw_idx in range(var_wells_idx):
                    # print(ref_wells[rw_idx*4:rw_idx*4+4], var_wells[vw_idx*4:vw_idx*4+4])
                    plot_allele_single_plate(
                        allele_meta_df,
                        variant=variant, sel_channel=feat, 
                        auroc_df=auroc_df_batch, 
                        plate_img_qc=allele_img_qc, 
                        site="05", 
                        ref_well=ref_wells[rw_idx*4:rw_idx*4+4],
                        var_well=var_wells[vw_idx*4:vw_idx*4+4],
                        show_plot=display,
                        imgs_dir=TIFF_IMGS_DIR, 
                        output_dir=output_dir
                    )

In [44]:
# for allele in tqdm(img_phenotype_alleles):
#     # print(allele)
#     save_allele_imgs(allele, "GFP", display=False, save_img=True)

## 3. Check specific alleles manually

In [7]:
with pl.Config(set_tbl_rows=100):
    display(
        auroc_oneperc_df.filter(
            pl.col("Altered_95th_perc_both_batches_GFP"),
            pl.col("AUROC_Mean_GFP")>0.99
        ).unique("gene_variant").select(
            "gene_variant", "AUROC_Mean_GFP"
        ).sort("AUROC_Mean_GFP", descending=True)
    )

gene_variant,AUROC_Mean_GFP
str,f64
"""RHO_Cys110Tyr""",0.999568
"""PMP22_Cys109Arg""",0.999515
"""RHO_Asp190Gly""",0.999466
"""MVK_Ser329Asn""",0.998275
"""PMP22_Leu80Pro""",0.998238
"""MLH1_Ser44Phe""",0.997603
"""SOS2_Val869Ile""",0.996982
"""RHO_Pro267Thr""",0.996585
"""FXN_Met1Ile""",0.995942
"""RP2_Leu253Arg""",0.995664


## 4. Download or upload imgs

In [None]:
# rsync -avz username@server_ip:/home/shenrunx/igvf/varchamp/2025_varchamp_snakemake/2.snakemake_pipeline/outputs/visualize_imgs/2025_01_Batch_13-14/F9*.png /local/destination/path/