Skip to content

Commit

Permalink
Add a tool to make IGV-compat. BEDPE files from AggregateSvPileup
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Apr 10, 2024
1 parent 836bce0 commit eaff9f9
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 7 deletions.
14 changes: 10 additions & 4 deletions src/main/scala/com/fulcrumgenomics/sv/cmdline/ClpGroups.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ import com.fulcrumgenomics.sopt.cmdline.ClpGroup
/** Groups for organizing command line programs for display. */
object ClpGroups {

class _All extends ClpGroup {
override val name: String = "All tools"
override val description: String = "All tools."
class _BreakpointAndSv extends ClpGroup {
override val name: String = "Breakpoint and SV Tools"
override val description: String = "Primary tools for calling and transforming breakpoints and SVs."
}

final val All = classOf[_All]
class _Utilities extends ClpGroup {
override val name: String = "Utility Tools"
override val description: String = "Helper tools for working with breakpoint or SV data."
}

final val BreakpointAndSv = classOf[_BreakpointAndSv]
final val Utilities = classOf[_Utilities]
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import com.fulcrumgenomics.FgBioDef._
import scala.collection.mutable


@clp(group=ClpGroups.All, description=
@clp(group=ClpGroups.BreakpointAndSv, description=
"""
|Merges nearby pileups of reads supporting putative breakpoints.
|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package com.fulcrumgenomics.sv.tools

import com.fulcrumgenomics.commons.CommonsDef._
import com.fulcrumgenomics.commons.io.Writer
import com.fulcrumgenomics.commons.util.LazyLogging
import com.fulcrumgenomics.sopt.{arg, clp}
import com.fulcrumgenomics.sv.cmdline.{ClpGroups, SvTool}
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter
import com.fulcrumgenomics.util.{Io, Metric}
import htsjdk.tribble.annotation.Strand

import java.io.BufferedWriter


@clp(group=ClpGroups.Utilities, description= "Convert the output of AggregateSvPileup to BEDPE.")
class AggregateSvPileupToBedPE(
@arg(flag='i', doc="Input text file of aggregate pileups generated by AggregateSvPileup") input: FilePath,
@arg(flag='o', doc="Output text file of the aggregate pileups in BEDPE format.") output: FilePath,
) extends SvTool with LazyLogging {

Io.assertReadable(input)
Io.assertCanWriteFile(output)

override def execute(): Unit = {
val reader = Metric.iterator[AggregatedBreakpointPileup](input)
val writer = BedPEWriter(output)

reader.map(BedPE.apply).foreach(writer.write)

writer.close()
}
}

/** Companion object for [[AggregateSvPileupToBedPE]]. */
object AggregateSvPileupToBedPE {

/** The IGV-supported BEDPE file extension. */
val BedPEExtension: String = ".bedpe"

/** A simple BEDPE record as defined by `bedtools`:
*
* - https://bedtools.readthedocs.io/en/latest/content/general-usage.html#bedpe-format)
*
* Future compatibility could be implemented for supporting 10x flavored BEDPE files:
*
* - https://github.com/igvteam/igv/wiki/BedPE-Support
*
* Note that the field `score` is allowed to be a string per bedtools!
*/
case class BedPE(
chrom1: String,
start1: Int,
end1: Int,
chrom2: String,
start2: Int,
end2: Int,
name: String,
score: String,
strand1: Strand,
strand2: Strand,
) extends Metric

/** Companion object for [[BedPE]]. */
object BedPE {

/** Build a [[BedPE]] record from an [[AggregatedBreakpointPileup]]. */
def apply(pileup: AggregatedBreakpointPileup): BedPE = {
new BedPE(
chrom1 = pileup.left_contig,
start1 = pileup.left_min_pos,
end1 = pileup.left_max_pos + 1,
chrom2 = pileup.right_contig,
start2 = pileup.right_min_pos,
end2 = pileup.right_max_pos + 1,
name = pileup.id,
score = pileup.total.toString,
strand1 = Strand.decode(pileup.left_strand),
strand2 = Strand.decode(pileup.right_strand),
)
}

/** A writer class for writing [[BedPE]] records. */
class BedPEWriter(val out: BufferedWriter) extends Writer[BedPE] {

/** Write a [[BedPE]] record to the underlying writer. */
override def write(record: BedPE): Unit = {
out.write(record.chrom1)
out.write('\t')
out.write(Integer.toString(record.start1))
out.write('\t')
out.write(Integer.toString(record.end1))
out.write('\t')
out.write(record.chrom2)
out.write('\t')
out.write(Integer.toString(record.start2))
out.write('\t')
out.write(Integer.toString(record.end2))
out.write('\t')
out.write(record.name)
out.write('\t')
out.write(record.score)
out.write('\t')
out.write(record.strand1.toString)
out.write('\t')
out.write(record.strand2.toString)
out.newLine()
}

/** Closes the underlying writer. */
override def close(): Unit = out.close()
}

/** Companion object to [[BedPEWriter]]. */
object BedPEWriter {

/** Constructs a [[BedPEWriter]] that will write to the provided path. */
def apply(path: PathToIntervals): BedPEWriter = apply(Io.toWriter(path))

/** Constructs a [[BedPEWriter]] from a [[java.io.Writer]]. */
def apply(writer: java.io.Writer): BedPEWriter = writer match {
case bw: BufferedWriter => new BedPEWriter(bw)
case w => new BedPEWriter(new BufferedWriter(w))
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import com.fulcrumgenomics.util.{Io, Metric}

import scala.collection.immutable.IndexedSeq

@clp(group=ClpGroups.All, description=
@clp(group=ClpGroups.BreakpointAndSv, description=
"""
|Filters and merges SVPileup output.
""")
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/com/fulcrumgenomics/sv/tools/SvPileup.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ object TargetBedRequirement extends FgBioEnum[TargetBedRequirement] {
}


@clp(group=ClpGroups.All, description=
@clp(group=ClpGroups.BreakpointAndSv, description=
"""
|Collates a pileup of putative structural variant supporting reads.
|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package com.fulcrumgenomics.sv.tools

import com.fulcrumgenomics.commons.io.Io
import com.fulcrumgenomics.commons.util.DelimitedDataParser
import com.fulcrumgenomics.sv.UnitSpec
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.BedPE.BedPEWriter
import com.fulcrumgenomics.sv.tools.AggregateSvPileupToBedPE.{BedPE, BedPEExtension}
import com.fulcrumgenomics.util.Metric
import htsjdk.tribble.annotation.Strand


/** Unit tests for [[AggregateSvPileupToBedPE]]. */
class AggregateSvPileupToBedPETest extends UnitSpec {

/** A test aggregate breakpoint pileup. */
private val test_aggregate_breakpoint_pileup = AggregatedBreakpointPileup(
id = "112",
category = "Inter-contig rearrangement",
left_contig = "chr1",
left_min_pos = 100,
left_max_pos = 100,
left_strand = '+',
right_contig = "chr3",
right_min_pos = 200,
right_max_pos = 200,
right_strand = '-',
split_reads = 1,
read_pairs = 1,
total = 2,
left_pileups = PositionList(100),
right_pileups = PositionList(200),
)

/** A companion test BEDPE record. */
private val test_bed_pe = BedPE(
chrom1 = "chr1",
start1 = 100,
end1 = 101,
chrom2 = "chr3",
start2 = 200,
end2 = 201,
name = "112",
score = "2",
strand1 = Strand.POSITIVE,
strand2 = Strand.NEGATIVE,
)

"AggregateSvPileupToBedPE.BedPE" should "be instantiated from an AggregateBreakpointPileup" in {
BedPE(test_aggregate_breakpoint_pileup) shouldBe test_bed_pe
}

"AggregateSvPileupToBedPE.BedPEWriter" should "write a BedPE record" in {
val record = new BedPE(
chrom1 = "chr1",
start1 = 100,
end1 = 101,
chrom2 = "chr3",
start2 = 200,
end2 = 201,
name = "112",
score = "2",
strand1 = Strand.POSITIVE,
strand2 = Strand.NEGATIVE,
)

val expected = Seq(
record.chrom1,
Integer.toString(record.start1),
Integer.toString(record.end1),
record.chrom2,
Integer.toString(record.start2),
Integer.toString(record.end2),
record.name,
record.score,
record.strand1.toString,
record.strand2.toString,
).toIndexedSeq

val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension)
val writer = BedPEWriter(output)
writer.write(record)
writer.close()

val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq
records.length shouldBe 1
val actual = fields.map(field => records.head.get[String](field).value)
actual should contain theSameElementsInOrderAs expected
}

"AggregateSvPileupToBedPE" should "convert an AggregateSvPileup output to a BEDPE file" in {
val expected = Seq(
test_bed_pe.chrom1,
Integer.toString(test_bed_pe.start1),
Integer.toString(test_bed_pe.end1),
test_bed_pe.chrom2,
Integer.toString(test_bed_pe.start2),
Integer.toString(test_bed_pe.end2),
test_bed_pe.name,
test_bed_pe.score,
test_bed_pe.strand1.toString,
test_bed_pe.strand2.toString,
).toIndexedSeq

val input = Io.makeTempFile(this.getClass.getSimpleName, ".txt")
val output = Io.makeTempFile(this.getClass.getSimpleName, BedPEExtension)
Metric.write[AggregatedBreakpointPileup](input, test_aggregate_breakpoint_pileup)

new AggregateSvPileupToBedPE(input = input, output = output).execute()

val fields: Seq[String] = classOf[BedPE].getDeclaredFields.map(_.getName).toIndexedSeq
val records = DelimitedDataParser(output, delimiter = '\t', header = fields).toSeq
records.length shouldBe 1
val actual = fields.map(field => records.head.get[String](field).value)
actual should contain theSameElementsInOrderAs expected
}
}

0 comments on commit eaff9f9

Please sign in to comment.