New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

.fastQ to .adam #1718

Closed
Rokshan2016 opened this Issue Sep 12, 2017 · 3 comments

Comments

Projects
2 participants
@Rokshan2016

Rokshan2016 commented Sep 12, 2017

Is there anyway I can convert directly the .fastq to .adam?

@heuermh

This comment has been minimized.

Show comment
Hide comment
@heuermh

heuermh Sep 12, 2017

Member

Yes, there are several load methods on ADAMContext that can read FASTQ files

$ adam-shell
...

scala> import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMContext._

scala> val unpaired = sc.loadUnpairedFastq("adam-core/src/test/resources/fastq_sample1.fq")
unpaired: org.bdgenomics.adam.rdd.read.AlignmentRecordRDD = RDDBoundAlignmentRecordRDD(MapPartitionsRDD[5] at map at ADAMContext.scala:1950,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> unpaired.rdd.count()
res2: Long = 6

scala> val paired = sc.loadPairedFastq("adam-core/src/test/resources/proper_pairs_1.fq", "adam-core/src/test/resources/proper_pairs_2.fq")
paired: org.bdgenomics.adam.rdd.read.AlignmentRecordRDD = RDDBoundAlignmentRecordRDD(UnionRDD[10] at $plus$plus at ADAMContext.scala:1908,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> paired.rdd.count()
res3: Long = 6

scala> val interleaved = sc.loadInterleavedFastq("adam-core/src/test/resources/interleaved_fastq_sample1.ifq")
interleaved: org.bdgenomics.adam.rdd.read.AlignmentRecordRDD = RDDBoundAlignmentRecordRDD(MapPartitionsRDD[12] at flatMap at ADAMContext.scala:1822,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> interleaved.rdd.count()
res4: Long = 6

scala> val interleavedFragments = sc.loadInterleavedFastqAsFragments("adam-core/src/test/resources/interleaved_fastq_sample1.ifq")
interleavedFragments: org.bdgenomics.adam.rdd.fragment.FragmentRDD = RDDBoundFragmentRDD(MapPartitionsRDD[14] at map at ADAMContext.scala:2206,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> interleavedFragments.rdd.count()
res5: Long = 3

scala> interleavedFragments.rdd.first()
res6: org.bdgenomics.formats.avro.Fragment = {"readName": "H06HDADXX130110:2:2116:3345:91806", "instrument": null, "runId": null, "fragmentSize": null, "alignments": [{"readInFragment": 0, "contigName": null, "start": null, "oldPosition": null, "end": null, "mapq": null, "readName": "H06HDADXX130110:2:2116:3345:91806", "sequence": "GTTAGGGTTAGGGTTGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGCTAGGGTTAAGGGTAGGGTTAGCGAAAGGGCTGGGGTTAGGGGTGCGGGTACGCGTAGCATTAGGGCTAGAAGTAGGATCTGCAGTGCCTGACCGCGTCTGCGCGGCGACTGCCCAAAGCCTGGGGCCGACTCCAGGCTGAAGCTCAT", "qual": ">=<=???>?>???=??>>8<?><=2=<===1194<?;:?>>?#3==>##########################################################################################################################################################...
Member

heuermh commented Sep 12, 2017

Yes, there are several load methods on ADAMContext that can read FASTQ files

$ adam-shell
...

scala> import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMContext._

scala> val unpaired = sc.loadUnpairedFastq("adam-core/src/test/resources/fastq_sample1.fq")
unpaired: org.bdgenomics.adam.rdd.read.AlignmentRecordRDD = RDDBoundAlignmentRecordRDD(MapPartitionsRDD[5] at map at ADAMContext.scala:1950,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> unpaired.rdd.count()
res2: Long = 6

scala> val paired = sc.loadPairedFastq("adam-core/src/test/resources/proper_pairs_1.fq", "adam-core/src/test/resources/proper_pairs_2.fq")
paired: org.bdgenomics.adam.rdd.read.AlignmentRecordRDD = RDDBoundAlignmentRecordRDD(UnionRDD[10] at $plus$plus at ADAMContext.scala:1908,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> paired.rdd.count()
res3: Long = 6

scala> val interleaved = sc.loadInterleavedFastq("adam-core/src/test/resources/interleaved_fastq_sample1.ifq")
interleaved: org.bdgenomics.adam.rdd.read.AlignmentRecordRDD = RDDBoundAlignmentRecordRDD(MapPartitionsRDD[12] at flatMap at ADAMContext.scala:1822,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> interleaved.rdd.count()
res4: Long = 6

scala> val interleavedFragments = sc.loadInterleavedFastqAsFragments("adam-core/src/test/resources/interleaved_fastq_sample1.ifq")
interleavedFragments: org.bdgenomics.adam.rdd.fragment.FragmentRDD = RDDBoundFragmentRDD(MapPartitionsRDD[14] at map at ADAMContext.scala:2206,SequenceDictionary{},RecordGroupDictionary(),List(),None)

scala> interleavedFragments.rdd.count()
res5: Long = 3

scala> interleavedFragments.rdd.first()
res6: org.bdgenomics.formats.avro.Fragment = {"readName": "H06HDADXX130110:2:2116:3345:91806", "instrument": null, "runId": null, "fragmentSize": null, "alignments": [{"readInFragment": 0, "contigName": null, "start": null, "oldPosition": null, "end": null, "mapq": null, "readName": "H06HDADXX130110:2:2116:3345:91806", "sequence": "GTTAGGGTTAGGGTTGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGGTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTAGGGCTAGGGTTAAGGGTAGGGTTAGCGAAAGGGCTGGGGTTAGGGGTGCGGGTACGCGTAGCATTAGGGCTAGAAGTAGGATCTGCAGTGCCTGACCGCGTCTGCGCGGCGACTGCCCAAAGCCTGGGGCCGACTCCAGGCTGAAGCTCAT", "qual": ">=<=???>?>???=??>>8<?><=2=<===1194<?;:?>>?#3==>##########################################################################################################################################################...
@Rokshan2016

This comment has been minimized.

Show comment
Hide comment
@Rokshan2016

Rokshan2016 Sep 12, 2017

ok, let me try that

Thanks

Rokshan2016 commented Sep 12, 2017

ok, let me try that

Thanks

@Rokshan2016

This comment has been minimized.

Show comment
Hide comment
@Rokshan2016

Rokshan2016 Sep 13, 2017

It worked!

Thanks

Rokshan2016 commented Sep 13, 2017

It worked!

Thanks

@heuermh heuermh added this to the 0.23.0 milestone Dec 7, 2017

@heuermh heuermh added this to Completed in Release 0.23.0 Jan 4, 2018

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment