In [1]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ipynb.fs.full.biorsp import (
    find_foreground_background_points,
    # calculate_differences,
    rsp,
)

In [2]:
# Load your data
dge_matrix = pd.read_csv("data/MCA2_filtered.dge.txt", sep="\t", index_col=0)
tsne_results = pd.read_csv("embeddings/tsne_results.csv").to_numpy()
dbscan_results = pd.read_csv("embeddings/tsne_dbscan_results.csv")

In [3]:
results = pd.DataFrame(columns=['gene', 'coverage', 'rsp_area', 'rmsd', 'deviation_score'])

In [4]:
threshold = 1 # Define the threshold for foreground points - default is 1
clusters = None # Define the clusters to be considered as foreground - default is None (look at all clusters)
scanning_window=np.pi / 2 # Define the scanning window - default is pi/2
resolution=1000 # Define the resolution - default is 1000
angle_range=np.array([0, 2 * np.pi]) # Define the angle range - default is [0, 2*pi]
mode="absolute",  # Define the mode for CDFs - default is "absolute"

In [5]:
gene_list = dge_matrix.index

In [None]:
for gene in gene_list:
    print(f"Working on {gene}")
    foreground_points, background_points = find_foreground_background_points(
        gene_name=gene,
        dge_matrix=dge_matrix,
        tsne_results=tsne_results,
        threshold=threshold,
        dbscan_df=dbscan_results,
        selected_clusters=clusters,
    )

    if len(foreground_points) == 0:
        print(f"No foreground points found for {gene}, skipping...")
        continue

    vantage_point = background_points.mean(axis=0)
    coverage = len(foreground_points) / len(background_points)

    rsp_area, rmsd, deviation_score = rsp(
        foreground_points,
        background_points,
        vantage_point=vantage_point,
        scanning_window=scanning_window,
        resolution=resolution,
        angle_range=angle_range,
        mode=mode,
    )

    print(f"RSP Area: {rsp_area}")
    print(f"RMSD: {rmsd}")
    print(f"Deviation Score: {deviation_score}")
    print("\n")

    results.loc[len(results)] = [gene, coverage, rsp_area, rmsd, deviation_score]

In [10]:
os.makedirs("results", exist_ok=True)
results.sort_values(by='rsp_area', ascending=True).to_csv("results/rsp_results.csv", index=False)