Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
akmorrow13 committed May 10, 2019
1 parent 4a27435 commit 9ea49cf
Show file tree
Hide file tree
Showing 18 changed files with 90 additions and 196 deletions.
31 changes: 0 additions & 31 deletions mango-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,6 @@
</build>

<dependencies>
<!--<dependency>-->
<!--<groupId>com.esotericsoftware.kryo</groupId>-->
<!--<artifactId>kryo</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>com.github.samtools</groupId>-->
<!--<artifactId>htsjdk</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>it.unimi.dsi</groupId>-->
<!--<artifactId>fastutil</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>net.liftweb</groupId>
<artifactId>lift-json_${scala.version.prefix}</artifactId>
Expand Down Expand Up @@ -181,12 +166,6 @@
<artifactId>adam-core${spark.version.prefix}${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>org.bdgenomics.adam</groupId>-->
<!--<artifactId>adam-core${spark.version.prefix}${scala.version.prefix}</artifactId>-->
<!--<type>test-jar</type>-->
<!--<scope>test</scope>-->
<!--</dependency>-->
<dependency>
<groupId>org.bdgenomics.bdg-formats</groupId>
<artifactId>bdg-formats</artifactId>
Expand All @@ -202,11 +181,6 @@
<artifactId>utils-intervalrdd${spark.version.prefix}${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>org.bdgenomics.utils</groupId>-->
<!--<artifactId>utils-io${spark.version.prefix}${scala.version.prefix}</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>org.bdgenomics.utils</groupId>
<artifactId>utils-metrics${spark.version.prefix}${scala.version.prefix}</artifactId>
Expand Down Expand Up @@ -258,10 +232,5 @@
<artifactId>mango-core</artifactId>
<version>${project.version}</version>
</dependency>
<!--<dependency>-->
<!--<groupId>org.seqdoop</groupId>-->
<!--<artifactId>hadoop-bam</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
</dependencies>
</project>
2 changes: 1 addition & 1 deletion mango-cli/src/main/webapp/WEB-INF/layouts/browser.ssp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
<script>
<!-- Define Variables for Pileup.js -->
var contig = '${region.referenceName}';
// convert start and end to valid int
var start = parseInt('${region.start}'.replace(/,/g, ""));
var end = parseInt('${region.end}'.replace(/,/g, ""));

Expand All @@ -35,7 +36,6 @@

xhr.onreadystatechange = function() {
if (xhr.readyState == 4 && xhr.status == 200) {
var contigList = JSON.parse(xhr.response); <!-- TODO what is this for? -->

// add reference and genes
var tracks = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ class VizReadsSuite extends MangoFunSuite with ScalatraSuite {
val json = GA4GHutil.stringToVariantServiceResponse(response.getContent())
.getVariantsList

assert(json.size > 0)
assert(json.size == 61)
}
}

Expand Down
45 changes: 0 additions & 45 deletions mango-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,11 @@
</plugins>
</build>
<dependencies>
<!--<dependency>-->
<!--<groupId>com.esotericsoftware.kryo</groupId>-->
<!--<artifactId>kryo</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>com.github.samtools</groupId>
<artifactId>htsjdk</artifactId>
<scope>compile</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>it.unimi.dsi</groupId>-->
<!--<artifactId>fastutil</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>net.liftweb</groupId>
<artifactId>lift-json_${scala.version.prefix}</artifactId>
Expand Down Expand Up @@ -98,22 +88,11 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.version.prefix}</artifactId>
</dependency>
<!--<dependency>-->
<!--<groupId>org.bdgenomics.adam</groupId>-->
<!--<artifactId>adam-cli${spark.version.prefix}${scala.version.prefix}</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>org.bdgenomics.adam</groupId>
<artifactId>adam-core${spark.version.prefix}${scala.version.prefix}</artifactId>
<scope>compile</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>org.bdgenomics.adam</groupId>-->
<!--<artifactId>adam-core${spark.version.prefix}${scala.version.prefix}</artifactId>-->
<!--<type>test-jar</type>-->
<!--<scope>test</scope>-->
<!--</dependency>-->
<dependency>
<groupId>org.bdgenomics.bdg-formats</groupId>
<artifactId>bdg-formats</artifactId>
Expand Down Expand Up @@ -145,35 +124,11 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>org.eclipse.jetty</groupId>-->
<!--<artifactId>jetty-server</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>org.eclipse.jetty</groupId>-->
<!--<artifactId>jetty-webapp</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.version.prefix}</artifactId>
<scope>test</scope>
</dependency>
<!--<dependency>-->
<!--<groupId>org.scalatra</groupId>-->
<!--<artifactId>scalatra_${scala.version.prefix}</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>org.scalatra.scalate</groupId>-->
<!--<artifactId>scalate-core_${scala.version.prefix}</artifactId>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>org.seqdoop</groupId>-->
<!--<artifactId>hadoop-bam</artifactId>-->
<!--<scope>compile</scope>-->
<!--</dependency>-->
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,4 @@ class SAMRecordConverter extends Serializable with Logging {
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ object BamReader extends GenomicReader[SAMFileHeader, AlignmentRecord, Alignment
SamInputResource.of(new java.net.URL(fp)).index(new java.net.URL(indexUrl)))
}

val dictionary = reader.getFileHeader.getSequenceDictionary
val header = reader.getFileHeader
val dictionary = header.getSequenceDictionary

// no valid dictionary. Cannot query or filter data.
if (dictionary.getSequences.isEmpty) {
return (reader.getFileHeader, Array[AlignmentRecord]())
reader.close()
return (header, Array[AlignmentRecord]())
}

// modify chr prefix, if this file uses chr prefixes.
Expand Down Expand Up @@ -105,7 +107,7 @@ object BamReader extends GenomicReader[SAMFileHeader, AlignmentRecord, Alignment

reader.close()

(reader.getFileHeader, results)
(header, results)

} else {
val iter: Iterator[SAMRecord] = reader.iterator()
Expand All @@ -123,13 +125,13 @@ object BamReader extends GenomicReader[SAMFileHeader, AlignmentRecord, Alignment
reader.close()

// map SamRecords to ADAM
(reader.getFileHeader, samRecords.map(p => samRecordConverter.convert(p)))
(header, samRecords.map(p => samRecordConverter.convert(p)))
}
} else {
val samRecords = reader.iterator().toArray
reader.close

(reader.getFileHeader, samRecords.map(p => samRecordConverter.convert(p)))
(header, samRecords.map(p => samRecordConverter.convert(p)))
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ object BedReader extends GenomicReader[FeatureCodecHeader, Feature, FeatureDatas
createIndex(fp, codec)
}

val header = reader.getHeader().asInstanceOf[FeatureCodecHeader]
val dictionary = reader.getSequenceNames()

val results =
Expand Down Expand Up @@ -106,7 +107,7 @@ object BedReader extends GenomicReader[FeatureCodecHeader, Feature, FeatureDatas
.setStart(r.getStart.toLong - 1) // move to 0 indexed to match HDFS results
.setEnd(r.getEnd.toLong).build()).toArray

(reader.getHeader().asInstanceOf[FeatureCodecHeader], features)
(header, features)
}

/**
Expand Down Expand Up @@ -171,4 +172,4 @@ object BedReader extends GenomicReader[FeatureCodecHeader, Feature, FeatureDatas
idxFile.deleteOnExit()
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ import org.bdgenomics.adam.models.ReferenceRegion
import org.bdgenomics.adam.rdd.GenomicDataset

/**
* Trait for specifying functions for loading data from remote or local
* genomic file formats (vcf, bed, bam, narrowPeak)
*
* @tparam H File header
* @tparam R SpecificRecord
* @tparam D GenomicDataset
Expand Down Expand Up @@ -75,7 +78,11 @@ trait GenomicReader[H, R, D] {

def loadHDFS(sc: SparkContext, path: String, regions: Option[Iterable[ReferenceRegion]]): Tuple2[D, Array[R]]

// helper functions
/**
* Helper function for generating URL from string
* @param urlString URL string
* @return URL
*/
def createURL(urlString: String): URL = {

return new URL(urlString.trim())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import org.bdgenomics.mango.models.LazyMaterialization
import org.bdgenomics.utils.misc.Logging
import org.bdgenomics.formats.avro.Sample
import scala.collection.JavaConversions._
import htsjdk.tribble.index.Index
import htsjdk.tribble.AbstractFeatureReader

object VcfReader extends GenomicReader[VCFHeader, ADAMVariantContext, VariantContextDataset] with Logging {
Expand Down Expand Up @@ -241,10 +240,10 @@ object VcfReader extends GenomicReader[VCFHeader, ADAMVariantContext, VariantCon
}

// TODO already defined in ADAM in VariantContextConverter line 266
private def getHeaderLines(header: VCFHeader): Seq[VCFHeaderLine] = {
def getHeaderLines(header: VCFHeader): Seq[VCFHeaderLine] = {
(header.getFilterLines ++
header.getFormatHeaderLines ++
header.getInfoHeaderLines ++
header.getOtherHeaderLines)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,40 +17,23 @@
*/
package org.bdgenomics.mango.models

import java.io.{ File, PrintWriter, StringWriter }
import java.net.URL

import htsjdk.samtools.util.{ HttpUtils, SamRecordIntervalIteratorFactory }
import htsjdk.samtools._
import net.liftweb.json.Extraction._
import net.liftweb.json._
import org.apache.hadoop.io.LongWritable
import org.apache.parquet.filter2.dsl.Dsl._
import org.apache.parquet.filter2.predicate.FilterPredicate
import org.apache.parquet.hadoop.util.ContextUtil
import org.apache.parquet.io.api.Binary
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.{ SequenceDictionary, ReferenceRegion }
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.mango.converters.SAMRecordConverter
import org.bdgenomics.mango.core.util.ResourceUtils
import org.bdgenomics.mango.io.BamReader
import org.bdgenomics.utils.misc.{ HadoopUtil, Logging }
import org.bdgenomics.utils.misc.Logging
import org.bdgenomics.utils.instrumentation.Metrics
import ga4gh.Reads.ReadAlignment
import net.liftweb.json.Extraction._
import net.liftweb.json.Serialization._
import org.seqdoop.hadoop_bam.{ SAMRecordWritable, BAMInputFormat }
import org.seqdoop.hadoop_bam.util.SAMHeaderReader
import scala.collection.JavaConversions._
import scala.reflect._
import org.bdgenomics.mango.converters.GA4GHutil._

import scala.reflect.ClassTag

// metric variables
object AlignmentTimers extends Metrics {
val loadADAMData = timer("load alignments from parquet")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,17 @@ package org.bdgenomics.mango.models

import net.liftweb.json.Serialization.write
import org.apache.parquet.filter2.dsl.Dsl._
import org.apache.parquet.filter2.predicate.FilterPredicate
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary }
import org.bdgenomics.adam.projections.{ FeatureField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.feature.FeatureDataset
import org.bdgenomics.formats.avro.Feature
import org.bdgenomics.mango.converters.GA4GHutil
import org.bdgenomics.mango.core.util.{ ResourceUtils, VizUtils }
import org.bdgenomics.mango.core.util.ResourceUtils
import org.bdgenomics.mango.io.BedReader
import org.bdgenomics.utils.misc.Logging
import java.io.{ StringWriter, PrintWriter }
import scala.collection.JavaConversions._

import org.slf4j.LoggerFactory

/**
* Handles loading and tracking of data from persistent storage into memory for Feature data.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,9 @@
*/
package org.bdgenomics.mango.models

import java.io.File

import org.apache.hadoop.fs.Path
import org.apache.spark.{ HashPartitioner, SparkContext }
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary }
import org.bdgenomics.mango.core.util.ResourceUtils
import org.bdgenomics.mango.util.Bookkeep
import org.bdgenomics.utils.instrumentation.Metrics
import org.bdgenomics.utils.interval.array.IntervalArray
Expand Down Expand Up @@ -195,19 +190,19 @@ abstract class LazyMaterialization[T: ClassTag, S: ClassTag](name: String,
def put(regions: Iterable[ReferenceRegion]) = {
checkMemory()

// LazyMaterializationTimers.put.time {
LazyMaterializationTimers.put.time {

// filter out regions that are not found in the sequence dictionary
val filteredRegions = regions.filter(r => sd(r.referenceName).isDefined)
// filter out regions that are not found in the sequence dictionary
val filteredRegions = regions.filter(r => sd(r.referenceName).isDefined)

val data = loadAllFiles(Some(regions))
val data = loadAllFiles(Some(regions))

// tag regions as found, even if there is no data
filteredRegions.foreach(r => bookkeep.rememberValues(r, getFiles))
// tag regions as found, even if there is no data
filteredRegions.foreach(r => bookkeep.rememberValues(r, getFiles))

intArray = intArray.insert(data.toIterator)
intArray = intArray.insert(data.toIterator)

// }
}
}

/**
Expand Down
Loading

0 comments on commit 9ea49cf

Please sign in to comment.