In [1]:
import matplotlib.pyplot as plt
import numpy as np
from pickle_serialize import save, load
import os
from gene_centric_acceptor_donor_agnostic_gencode_evaluator import GencodeSpliceSiteEvaluator 

In [None]:
print("Initializing evaluator...")

# Initialize evaluator
evaluator = GencodeSpliceSiteEvaluator(
    gencode_gtf = os.path.join("..", "data", "gencode.v47.basic.annotation.gtf"),
    fasta_file = os.path.join("..", "data", "GRCh38.primary_assembly.genome.fa")
)

print("Parsing GENCODE GTF...")

# Get ground truth
ground_truth = evaluator.parse_gencode()

print("Generating predictions...")

# Generate predictions
pangolin_predictions = evaluator.generate_predictions(ground_truth)

print("Serializing predictions...")

# Save predictions
save(pangolin_predictions, "pangolin_splice_predictions")

print("Loading predictions...")

# Load and trim predictions to gene regions
trimmed_pangolin_predictions = evaluator.load_predictions("results/pangolin_splice_predictions.pkl")

print("Calculating metrics...")

# Calculate metrics
precision, recall, auprc, top_k = evaluator.calculate_metrics(ground_truth, trimmed_pangolin_predictions)

In [None]:
# Plot precision-recall curves{
  "name": "ROCm Bioinformatics Environment",
  "image": "your/remote-image:tag",  // Replace with your actual image
  "runArgs": [
    "--network=host",
    "--device=/dev/kfd",
    "--device=/dev/dri",
    "--ipc=host",
    "--shm-size=16G",
    "--group-add=video",
    "--cap-add=SYS_PTRACE",
    "--security-opt=seccomp=unconfined"
  ],
  
  "customizations": {
    "vscode": {
      "extensions": "${localExtensions}"
    }
  }
}
plt.figure(figsize=(10, 6))
plt.plot(recall, precision)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(f'Precision-Recall Curves\nAUPRC: {auprc:.3f}, Top-k: {top_k:.3f}')
plt.legend()
plt.grid(True)
plt.savefig("results/auprc_topk_pangolin.png", dpi=300)
plt.show()

In [None]:
# truth_sites = ground_truth['chr1'][962470-20:962470+20]
# pred_sites = trimmed_pangolin_predictions['chr1'][962470-20:962470+20]

# print(truth_sites)
# print(pred_sites)