In [3]:
#!/usr/bin/env python3
import os
import tempfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from SigProfilerAssignment import Analyzer as Analyze  # pip install SigProfilerAssignment # https://github.com/AlexandrovLab/SigProfilerAssignment


# -------------------- Config --------------------
BASE_DIR   = os.getcwd()
COSMIC_DIR = "Cosmic files"
INPUT_DIR  = "Input files"
OUT_DIR    = "SPA_signature_results/V2"
COSMIC_FILE = "COSMIC_v2_SBS_GRCh38.txt"  # contexts x signatures (tab-separated)

FILE_MAP = {
    "WES":     "Preclinical_Dataset_WES.txt",      # contexts x samples (tab-separated)
    "WGS":     "Preclinical_Dataset_WGS.txt",
    "TSO-500": "Preclinical_Dataset_TSO-500.txt",
}


# -------------------- IO helpers --------------------
def load_cosmic_signatures(cosmic_file, cosmic_dir=COSMIC_DIR, base_dir=BASE_DIR):
    path = os.path.join(base_dir, cosmic_dir, cosmic_file)
    # rows: contexts, cols: signatures
    df = pd.read_csv(path, sep="\t", index_col=0)
    return df

def load_crc_data(dataset_type, input_dir=INPUT_DIR, base_dir=BASE_DIR):
    path = os.path.join(base_dir, input_dir, FILE_MAP[dataset_type])
    # rows: contexts, cols: samples
    df = pd.read_csv(path, sep="\t", index_col=0)
    return df


# -------------------- Math helpers --------------------
def normalize_contributions(contrib):
    # contrib: signatures x samples
    colsum = contrib.sum(axis=0)
    colsum[colsum == 0] = 1.0
    return contrib / colsum

def cosine_per_sample(reconstructed, observed):
    # both: contexts x samples, same columns
    X = reconstructed.values
    Y = observed.loc[reconstructed.index, reconstructed.columns].values
    # L2-normalize columns, then column-wise dot
    def l2c(a): 
        n = np.linalg.norm(a, axis=0)
        n[n == 0] = 1.0
        return a / n
    Xn, Yn = l2c(X), l2c(Y)
    return np.einsum("ij,ij->j", Xn, Yn)


# -------------------- Core: SigProfilerAssignment fit --------------------
def fit_with_SigProfilerAssignment(samples_df, signature_db_path, outdir=OUT_DIR, genome_build="GRCh38"):
    """
    samples_df: DataFrame, rows=contexts, cols=samples
    signature_db_path: path to COSMIC signatures (rows=contexts, cols=signatures, tsv)
    returns: dict with contributions (signatures x samples) and reconstructed (contexts x samples)
    """
    os.makedirs(outdir, exist_ok=True)

    # 1) Write samples in SPA's 'matrix' format (tsv)
    with tempfile.TemporaryDirectory() as tmpd:
        matrix_path = os.path.join(tmpd, "samples_matrix.tsv")
        samples_df.to_csv(matrix_path, sep="\t")

        # 2) Run SPA (no plots; minimal)
        Analyze.cosmic_fit(
            samples=matrix_path,
            input_type="matrix",
            output=outdir,
            genome_build=genome_build,
            signature_database=signature_db_path,
            make_plots=False,
            sample_reconstruction_plots=False,
            cpu=1,
        )

    # 3) Read activities (signatures x samples)
    act_path = os.path.join(outdir, "Assignment_Solution", "Activities", "Assignment_Solution_Activities.txt")
    # File is Samples as index by default (per your template), transpose to signatures x samples
    activities = pd.read_csv(act_path, sep="\t", index_col="Samples").T

    # 4) Load signature profiles to reconstruct catalogues
    sig_mat = pd.read_csv(signature_db_path, sep="\t", index_col=0)  # contexts x signatures

    # Align signatures
    common_sigs = activities.index.intersection(sig_mat.columns)
    activities = activities.loc[common_sigs]
    sig_mat = sig_mat[common_sigs]

    # Align contexts
    common_ctx = samples_df.index.intersection(sig_mat.index)
    sig_mat = sig_mat.loc[common_ctx]
    # reconstruct: (contexts x signatures) @ (signatures x samples) = (contexts x samples)
    reconstructed = sig_mat.values @ activities.values
    reconstructed = pd.DataFrame(reconstructed, index=common_ctx, columns=activities.columns)

    return {
        "contribution": activities,     # signatures x samples
        "reconstructed": reconstructed  # contexts x samples
    }


# -------------------- Plot --------------------
def plot_cosine_box(cosine_df, title="Cosine Similarity with COSMIC v2",
                    out_png=os.path.join(OUT_DIR, "cosine_boxplot.png")):
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(figsize=(6, 4))
    order = ["WGS", "WES", "TSO-500"]
    data = [cosine_df.loc[cosine_df["Data_Type"] == k, "Cosine_Similarity"].values for k in order]
    ax.boxplot(data, vert=False, labels=order)
    ax.set_xlabel("Cosine Similarity")
    ax.set_ylabel("NGS Workflow")
    ax.set_title(title)
    ax.set_xlim(0.89, 1.0)           # optional
    ax.axvline(0.90, linestyle="--") # optional
    fig.tight_layout()
    fig.savefig(out_png, dpi=300, bbox_inches="tight")
    try:
        plt.show()  # does nothing on headless runs; OK
    except Exception:
        pass
    print(f"[Saved] {out_png}")


# -------------------- Orchestration --------------------
def main():
    print("---> Mutational Signatures Analysis <---")

    # Paths
    signature_db_path = os.path.join(BASE_DIR, COSMIC_DIR, COSMIC_FILE)

    # Load data
    cosmic_df = load_cosmic_signatures(COSMIC_FILE)  # not directly used in SPA; kept for parity
    CRC_WES = load_crc_data("WES")
    CRC_WGS = load_crc_data("WGS")
    CRC_TSO = load_crc_data("TSO-500")

    # Fit each dataset with SPA
    print("[SPA] Fitting WES")
    wes_fit = fit_with_SigProfilerAssignment(CRC_WES, signature_db_path=signature_db_path, outdir=os.path.join(OUT_DIR, "WES"))
    print("[SPA] Fitting WGS")
    wgs_fit = fit_with_SigProfilerAssignment(CRC_WGS, signature_db_path=signature_db_path, outdir=os.path.join(OUT_DIR, "WGS"))
    print("[SPA] Fitting TSO-500")
    tso_fit = fit_with_SigProfilerAssignment(CRC_TSO, signature_db_path=signature_db_path, outdir=os.path.join(OUT_DIR, "TSO-500"))

    # Normalise contributions
    wes_contrib_norm = normalize_contributions(wes_fit["contribution"])
    wgs_contrib_norm = normalize_contributions(wgs_fit["contribution"])
    tso_contrib_norm = normalize_contributions(tso_fit["contribution"])

    # Compute cosine similarities (per sample)
    wes_cos = cosine_per_sample(wes_fit["reconstructed"], CRC_WES)
    wgs_cos = cosine_per_sample(wgs_fit["reconstructed"], CRC_WGS)
    tso_cos = cosine_per_sample(tso_fit["reconstructed"], CRC_TSO)

    # Collect for plotting
    cosine_df = pd.DataFrame({
        "Cosine_Similarity": np.concatenate([wgs_cos, wes_cos, tso_cos]),
        "Data_Type": (["WGS"] * len(wgs_cos)) + (["WES"] * len(wes_cos)) + (["TSO-500"] * len(tso_cos))
    })

    print("--> Boxplot (display)")
    plot_cosine_box(cosine_df)

if __name__ == "__main__":
    main()


---> Mutational Signatures Analysis <---
[SPA] Fitting WES
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    1.9s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    4.2s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:    5.9s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SPA] Fitting WGS
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:    1.4s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SPA] Fitting TSO-500
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    1.7s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    3.7s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:    5.3s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
--> Boxplot (display)
[Saved] SPA_signature_results/V2/cosine_boxplot.png


In [2]:
pip install SigProfilerAssignment

Collecting SigProfilerAssignment
  Downloading sigprofilerassignment-1.0.3-py3-none-any.whl.metadata (13 kB)
Collecting scipy>=1.13 (from SigProfilerAssignment)
  Downloading scipy-1.16.3-cp311-cp311-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting SigProfilerMatrixGenerator>=1.3.0 (from SigProfilerAssignment)
  Downloading sigprofilermatrixgenerator-1.3.6-py3-none-any.whl.metadata (16 kB)
Collecting sigProfilerPlotting>=1.4.0 (from SigProfilerAssignment)
  Using cached sigprofilerplotting-1.4.2-py3-none-any.whl.metadata (7.4 kB)
Collecting reportlab>=3.5.42 (from SigProfilerAssignment)
  Using cached reportlab-4.4.4-py3-none-any.whl.metadata (1.7 kB)
Collecting pypdf>=6.0.0 (from SigProfilerAssignment)
  Downloading pypdf-6.1.3-py3-none-any.whl.metadata (7.1 kB)
Collecting alive_progress>=2.4.1 (from SigProfilerAssignment)
  Using cached alive_progress-3.3.0-py3-none-any.whl.metadata (72 kB)
Collecting pdf2image>=1.16.0 (from SigProfilerAssignment)
  Using cached pdf2image-1.17.0-py3

In [None]:
#cosine_to_tsv

#!/usr/bin/env python3
import os
import tempfile
import numpy as np
import pandas as pd
from SigProfilerAssignment import Analyzer as Analyze  # pip install SigProfilerAssignment

# -------------------- Config --------------------
BASE_DIR   = os.getcwd()
COSMIC_DIR = "Cosmic files"
INPUT_DIR  = "Input files"
OUT_BASE   = "SPA_signature_results"   # parent; subdirs per version

COSMIC_FILES = [
    "COSMIC_v2_SBS_GRCh38.txt",
    "COSMIC_v3.2_SBS_GRCh38.txt",
]

FILE_MAP = {
    "WES":     "Preclinical_Dataset_WES.txt",
    "WGS":     "Preclinical_Dataset_WGS.txt",
    "TSO-500": "Preclinical_Dataset_TSO-500.txt",
}

TOOL_NAME = "SigProfilerAssignment"


# -------------------- IO helpers --------------------
def load_crc_data(dataset_type, input_dir=INPUT_DIR, base_dir=BASE_DIR):
    path = os.path.join(base_dir, input_dir, FILE_MAP[dataset_type])
    df = pd.read_csv(path, sep="\t", index_col=0)  # rows=contexts, cols=samples
    return df


# -------------------- Math helpers --------------------
def cosine_per_sample(reconstructed: pd.DataFrame, observed: pd.DataFrame) -> pd.Series:
    """
    Return per-sample cosine similarity as a pandas Series indexed by sample name.
    reconstructed: contexts x samples
    observed:      contexts x samples
    """
    # align contexts & samples
    common_ctx = reconstructed.index.intersection(observed.index)
    common_samples = reconstructed.columns.intersection(observed.columns)
    X = reconstructed.loc[common_ctx, common_samples].values
    Y = observed.loc[common_ctx, common_samples].values

    def l2c(a):
        n = np.linalg.norm(a, axis=0)
        n[n == 0] = 1.0
        return a / n

    Xn, Yn = l2c(X), l2c(Y)
    cos = np.einsum("ij,ij->j", Xn, Yn)
    return pd.Series(cos, index=common_samples)


# -------------------- Core SPA wrapper --------------------
def fit_with_SigProfilerAssignment(samples_df: pd.DataFrame,
                                   signature_db_path: str,
                                   outdir: str,
                                   genome_build: str = "GRCh38"):
    """
    Run SigProfilerAssignment.cosmic_fit on a samples matrix.
    Returns contribution (signatures x samples) and reconstructed (contexts x samples).
    """
    os.makedirs(outdir, exist_ok=True)

    with tempfile.TemporaryDirectory() as tmpd:
        matrix_path = os.path.join(tmpd, "samples_matrix.tsv")
        samples_df.to_csv(matrix_path, sep="\t")

        Analyze.cosmic_fit(
            samples=matrix_path,
            input_type="matrix",
            output=outdir,
            genome_build=genome_build,
            signature_database=signature_db_path,
            make_plots=False,
            sample_reconstruction_plots=False,
            cpu=1,
        )

    act_path = os.path.join(
        outdir, "Assignment_Solution", "Activities",
        "Assignment_Solution_Activities.txt"
    )
    activities = pd.read_csv(act_path, sep="\t", index_col="Samples").T
    sig_mat = pd.read_csv(signature_db_path, sep="\t", index_col=0)  # contexts x signatures

    # Align signatures
    common_sigs = activities.index.intersection(sig_mat.columns)
    activities = activities.loc[common_sigs]
    sig_mat = sig_mat[common_sigs]

    # Align contexts
    # reconstructed: (contexts x signatures) @ (signatures x samples)
    reconstructed = sig_mat.values @ activities.values
    reconstructed = pd.DataFrame(
        reconstructed,
        index=sig_mat.index,
        columns=activities.columns
    )

    return {
        "contribution": activities,      # signatures x samples
        "reconstructed": reconstructed,  # contexts x samples
    }


# -------------------- Orchestration --------------------
def main():
    all_rows = []

    # Load input catalogues once
    CRC = {
        dt: load_crc_data(dt)
        for dt in ["WGS", "WES", "TSO-500"]
    }

    for cosmic_file in COSMIC_FILES:
        cosmic_path = os.path.join(BASE_DIR, COSMIC_DIR, cosmic_file)
        cosmic_version = os.path.splitext(cosmic_file)[0]
        out_version_dir = os.path.join(OUT_BASE, cosmic_version)
        os.makedirs(out_version_dir, exist_ok=True)

        print(f"\n=== {TOOL_NAME} with {cosmic_version} ===")

        for dt in ["WGS", "WES", "TSO-500"]:
            print(f"[{TOOL_NAME}] Fitting {dt}")
            outdir_dt = os.path.join(out_version_dir, dt)
            fit = fit_with_SigProfilerAssignment(
                CRC[dt],
                signature_db_path=cosmic_path,
                outdir=outdir_dt,
                genome_build="GRCh38"
            )

            cos_series = cosine_per_sample(fit["reconstructed"], CRC[dt])

            for sample, cos_val in cos_series.items():
                all_rows.append({
                    "Sample": sample,
                    "Cosine_Similarity": float(cos_val),
                    "Data_Type": dt,
                    "Tool": TOOL_NAME,
                    "COSMIC_Version": cosmic_version,
                })

    df_all = pd.DataFrame(all_rows)
    out_tsv = "CosineResults_SPA_ALL.tsv"
    df_all.to_csv(out_tsv, sep="\t", index=False)
    print(f"\n[Saved] {out_tsv}")
    print(df_all.head())


if __name__ == "__main__":
    main()



=== SigProfilerAssignment with COSMIC_v2_SBS_GRCh38 ===
[SigProfilerAssignment] Fitting WGS
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:    2.3s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting WES
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    2.6s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    5.7s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:    7.9s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting TSO-500
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    2.9s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    6.4s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:    8.8s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 

=== SigProfilerAssignment with COSMIC_v3.2_SBS_GRCh38 ===
[SigProfilerAssignment] Fitting WGS
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    3.2s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:   12.2s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting WES
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    3.0s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:   12.2s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   26.1s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:   37.4s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting TSO-500
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    2.5s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:   11.2s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   24.2s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:   33.4s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 

[Saved] CosineResults_SPA_ALL.tsv
   Sample  Cosine_Similarity Data_Type                   Tool  \
0     C10           0.952420       WGS  SigProfilerAssignment   
1  C125PM           0.927599       WGS  SigProfilerAssignment   
2     C80           0.925640       WGS  SigProfilerAssignment   
3     C84           0.928805       WGS  SigProfilerAssignment   
4   CACO2           0.930211       WGS  SigProfilerAssignment   

         COSMIC_Version  
0  COSMIC_v2_SBS_GRCh38  
1  COSMIC_v2_SBS_GRCh38  
2  COSMIC_v2_SBS_GRCh38  
3  COSMIC_v2_SBS_GRCh38  
4  COSMIC_v2_SBS_GRCh38  


In [2]:
#spa_save_all.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import tempfile
import numpy as np
import pandas as pd
from SigProfilerAssignment import Analyzer as Analyze  # pip install SigProfilerAssignment

# -------------------- Config --------------------
BASE_DIR   = os.getcwd()
COSMIC_DIR = "Cosmic files"
INPUT_DIR  = "Input files"
OUT_BASE   = "signature_results/SPA"   # parent; subdirs per COSMIC version
TOOL_NAME  = "SigProfilerAssignment"

COSMIC_FILES = [
    "COSMIC_v2_SBS_GRCh38.txt",
    "COSMIC_v3.2_SBS_GRCh38.txt",
]

FILE_MAP = {
    "WES":     "Preclinical_Dataset_WES.txt",
    "WGS":     "Preclinical_Dataset_WGS.txt",
    "TSO-500": "Preclinical_Dataset_TSO-500.txt",
}


# -------------------- Helpers --------------------
def ensure_dir(p):
    os.makedirs(p, exist_ok=True)

def load_crc_data(dataset_type):
    path = os.path.join(BASE_DIR, INPUT_DIR, FILE_MAP[dataset_type])
    return pd.read_csv(path, sep="\t", index_col=0)  # rows=contexts, cols=samples

def load_cosmic_signatures(cosmic_file):
    path = os.path.join(BASE_DIR, COSMIC_DIR, cosmic_file)
    return pd.read_csv(path, sep="\t", index_col=0)  # rows=contexts, cols=signatures

def normalize_contributions(contrib):
    colsum = contrib.sum(axis=0)
    colsum[colsum == 0] = 1.0
    return contrib / colsum

def cosine_per_sample(reconstructed: pd.DataFrame, observed: pd.DataFrame) -> pd.Series:
    # align contexts & samples
    common_ctx = reconstructed.index.intersection(observed.index)
    common_smp = reconstructed.columns.intersection(observed.columns)
    X = reconstructed.loc[common_ctx, common_smp].values
    Y = observed.loc[common_ctx, common_smp].values

    def l2c(a):
        n = np.linalg.norm(a, axis=0)
        n[n == 0] = 1.0
        return a / n

    Xn, Yn = l2c(X), l2c(Y)
    cos = np.einsum("ij,ij->j", Xn, Yn)
    return pd.Series(cos, index=common_smp)

def save_matrix(df: pd.DataFrame, path: str):
    df.to_csv(path, sep="\t")

# -------------------- SPA wrapper --------------------
def fit_with_SPA(samples_df: pd.DataFrame, signature_db_path: str, outdir: str, genome_build: str = "GRCh38"):
    ensure_dir(outdir)

    with tempfile.TemporaryDirectory() as tmpd:
        matrix_path = os.path.join(tmpd, "samples_matrix.tsv")
        samples_df.to_csv(matrix_path, sep="\t")

        Analyze.cosmic_fit(
            samples=matrix_path,
            input_type="matrix",
            output=outdir,
            genome_build=genome_build,
            signature_database=signature_db_path,
            make_plots=False,
            sample_reconstruction_plots=False,
            cpu=1,
        )

    act_path = os.path.join(outdir, "Assignment_Solution", "Activities", "Assignment_Solution_Activities.txt")
    activities = pd.read_csv(act_path, sep="\t", index_col="Samples").T  # signatures x samples

    sig_mat = pd.read_csv(signature_db_path, sep="\t", index_col=0)      # contexts x signatures
    common_sigs = activities.index.intersection(sig_mat.columns)
    activities = activities.loc[common_sigs]
    sig_mat = sig_mat[common_sigs]

    reconstructed = sig_mat.values @ activities.values                   # contexts x samples
    reconstructed = pd.DataFrame(reconstructed, index=sig_mat.index, columns=activities.columns)

    return {"contribution": activities, "reconstructed": reconstructed}

# -------------------- Orchestration --------------------
def main():
    # Preload catalogues
    CRC = {dt: load_crc_data(dt) for dt in ["WGS", "WES", "TSO-500"]}

    all_rows = []

    for cosmic_file in COSMIC_FILES:
        cosmic_version = os.path.splitext(cosmic_file)[0]
        signature_db_path = os.path.join(BASE_DIR, COSMIC_DIR, cosmic_file)

        out_version_dir = os.path.join(OUT_BASE, cosmic_version)
        ensure_dir(out_version_dir)

        print(f"\n=== {TOOL_NAME} | {cosmic_version} ===")

        for dt in ["WGS", "WES", "TSO-500"]:
            print(f"[{TOOL_NAME}] Fitting {dt}")
            out_dt_dir = os.path.join(out_version_dir, dt)
            ensure_dir(out_dt_dir)

            fit = fit_with_SPA(CRC[dt], signature_db_path, outdir=out_dt_dir)

            # Save contributions (raw + normalized)
            contrib = fit["contribution"].copy()                     # signatures x samples
            contrib_norm = normalize_contributions(contrib)

            save_matrix(contrib,      os.path.join(out_version_dir, f"{dt}_contribution.tsv"))
            save_matrix(contrib_norm, os.path.join(out_version_dir, f"{dt}_contribution_normalized.tsv"))

            # Save reconstructed (contexts x samples)
            recon = fit["reconstructed"]
            save_matrix(recon, os.path.join(out_version_dir, f"{dt}_reconstructed.tsv"))

            # Save metadata
            pd.DataFrame({"Signature": contrib.index}).to_csv(
                os.path.join(out_version_dir, f"{dt}_signatures_used.tsv"), sep="\t", index=False
            )
            pd.DataFrame({"Context": recon.index}).to_csv(
                os.path.join(out_version_dir, f"{dt}_contexts_used.tsv"), sep="\t", index=False
            )

            # Cosine rows
            cos = cosine_per_sample(recon, CRC[dt])
            for smp, val in cos.items():
                all_rows.append({
                    "Sample": smp,
                    "Cosine_Similarity": float(val),
                    "Data_Type": dt,
                    "Tool": TOOL_NAME,
                    "COSMIC_Version": cosmic_version,
                })

    df_all = pd.DataFrame(all_rows)
    df_all.to_csv("CosineResults_SigProfilerAssignment_ALL.tsv", sep="\t", index=False)
    print("\n[Saved] CosineResults_SigProfilerAssignment_ALL.tsv")


if __name__ == "__main__":
    main()



=== SigProfilerAssignment | COSMIC_v2_SBS_GRCh38 ===
[SigProfilerAssignment] Fitting WGS
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:    1.5s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting WES
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    1.8s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    4.0s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:    5.8s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting TSO-500
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    1.8s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    3.9s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:    5.6s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 

=== SigProfilerAssignment | COSMIC_v3.2_SBS_GRCh38 ===
[SigProfilerAssignment] Fitting WGS
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    2.0s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:    8.1s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting WES
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    2.0s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    8.8s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   19.9s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:   28.0s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 
[SigProfilerAssignment] Fitting TSO-500
Assigning COSMIC sigs or Signature Database ...... 


[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    1.7s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    6.9s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   16.4s
[Parallel(n_jobs=1)]: Done 230 out of 230 | elapsed:   24.3s finished




 
Your Job Is Successfully Completed! Thank You For Using SigProfilerAssignment.
 

[Saved] CosineResults_SigProfilerAssignment_ALL.tsv
