In [1]:
import validator
import alignment_utilities as au
import alignment_visualization_utilities as avu
from validator import check_interval, align_flanking_sequences

In [2]:
import warnings
warnings.filterwarnings( "ignore", module = "seaborn\..*" )
warnings.filterwarnings( "ignore", module = "matplotlib\..*" )

In [3]:
class ReferenceLocations:
    whole_genome = True
    if whole_genome:
        hg002t2t = "/Users/fleharty/resources/hg002v0.9.fasta.gz"
        chm13 = "/Users/fleharty/resources/chm13v2.0.fa.gz"
        hg38 = "/Users/fleharty/resources/Homo_sapiens_assembly38.fasta"
        variant_call_fasta = "/Users/fleharty/AlignmentValidator/hg002_cnv_passing_variants_seq.fasta"
    else:
        hg002t2t = "/Users/fleharty/resources/hg002v0.9.chr1.fasta.gz"
        chm13 = "/Users/fleharty/resources/chm13v2.0.chr1.fa.gz"
        hg38 = "/Users/fleharty/resources/Homo_sapiens_assembly38.chr1.fasta.gz"
        variant_call_fasta = "/Users/fleharty/AlignmentValidator/hg002_cnv_passing_variants_seq.chr1.fasta"

In [4]:
hg002t2t = au.load_reference(ReferenceLocations.hg002t2t)

INFO:root:Loading reference from: /Users/fleharty/resources/hg002v0.9.fasta.gz


In [None]:
hg38 = au.load_reference(ReferenceLocations.hg38)

In [None]:
import importlib
importlib.reload(validator)

flanking_alignments = validator.align_flanking_sequences("chr1:103900576-103901103", ReferenceLocations.hg38, hg38, hg002t2t)
flanking_alignments

In [None]:
print(flanking_alignments["left_truth_flank"])
print("")
print(flanking_alignments["right_truth_flank"])

In [None]:
ev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:103900576-103901103", hg38, hg002t2t, True)


In [None]:
# SIMPLE SMALL HET DELETION
ev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:103900576-103901103", hg38, hg002t2t, True)

In [None]:
check_interval("chr1:103900576-103901103", ReferenceLocations.hg38, hg38, hg002t2t)

In [None]:
# SIMPLE SMALL HOM DELETION
ev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:176509082-176509637", hg38, hg002t2t, True)

In [None]:
# COMPLEX DELETION chr1:247687159-247693213
# Has 6 matches to hg38 chr1, why?
ev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:247687159-247693213", hg38, hg002t2t)

In [None]:
# This is a DUP that we are going to check if it is a deletion
ev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:248407554-248446847", hg38, hg002t2t)

In [None]:
# This is a Dragen DEL call that is filtered by MinQUAL
cev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:3643704-3644335", hg38, hg002t2t)

In [None]:
# This is a Dragen DEL call that is filtered by cnvLength
# Note that the joined sequence on hg002 only matches (0-50) on PATERNAL, and there is no (51-100) match on PATERNAL.
# I think this is because the Dragen caller may be estimating the size too small?
ev = validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr1:14109814-14112366", hg38, hg002t2t)

In [None]:
stuff = ['chr10:39521557-39533371']
other_stuff = ['chr10_PATERNAL:39521557-39533371', 'chr10_MATERNAL:39475935-39487740', 'chr10_MATERNAL:39538510-39550326', 
               'chr10_MATERNAL:39486365-39498169', 'chr10_MATERNAL:39496794-39508598', 'chr10_MATERNAL:39507223-39519027', 
               'chr10_MATERNAL:39517652-39529456', 'chr10_MATERNAL:39528081-39539885']

In [None]:
avu.PlotIntervals(stuff, other_stuff).plot_interval_on_chromo()

In [None]:
with open('../resources/chr1_deletions.txt', 'r') as file:
    for line in file:
        line = line.strip()  # Remove leading/trailing whitespaces and newline characters
        print(line, check_interval(line, ReferenceLocations.hg38, hg38, hg002t2t))


In [None]:
validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr17:46135410-46292238", hg38, hg002t2t, True)

In [None]:
validator.evaluate_deletion(ReferenceLocations.hg38, ReferenceLocations.hg002t2t, "chr17:46135410-46292238", hg38, hg002t2t, True)