In [4]:
:dep bio = { version = "*"}

In [3]:
// Import some modules
use bio::alphabets;
use bio::data_structures::bwt::{bwt, less, Occ};
use bio::data_structures::fmindex::{FMIndex, FMIndexable};
use bio::data_structures::suffix_array::suffix_array;
use bio::io::fastq;
use bio::io::fastq::FastqRead;
use std::io;

// a given text
let text = b"ACAGCTCGATCGGTA";
let pattern = b"ATCG";

// Create an FM-Index for the given text.

// instantiate an alphabet
let alphabet = alphabets::dna::iupac_alphabet();
// calculate a suffix array
let sa = suffix_array(text);
// calculate the Burrows-Wheeler-transform
let bwt = bwt(text, &sa);
// calculate the vectors less and Occ (occurrences)
let less = less(&bwt, &alphabet);
let occ = Occ::new(&bwt, 3, &alphabet);
// set up FMIndex
let fmindex = FMIndex::new(&bwt, &less, &occ);
// do a backwards search for the pattern
let interval = fmindex.backward_search(pattern.iter());
let positions = interval.occ(&sa);

// Iterate over a FASTQ file, use the alphabet to validate read
// sequences and search for exact matches in the FM-Index.

// create FASTQ reader
let mut reader = fastq::Reader::new(io::stdin());
let mut record = fastq::Record::new();
reader.read(&mut record).expect("Failed to parse record");

while !record.is_empty() {
    let check = record.check();
    if check.is_err() {
        panic!("I got a rubbish record!")
    }
    // obtain sequence
    let seq = record.seq();
    // check, whether seq is in the expected alphabet
    if alphabet.is_word(seq) {
        let interval = fmindex.backward_search(seq.iter());
        let positions = interval.occ(&positions);
    }
    reader.read(&mut record).expect("Failed to parse record");
}

Error: unused variable: `positions`

Error: `bwt` does not live long enough

Error: `less` does not live long enough

Error: `occ` does not live long enough

Error: cannot move out of `occ` because it is borrowed

Error: cannot move out of `bwt` because it is borrowed

Error: cannot move out of `less` because it is borrowed

In [6]:
use bio::alphabets;
use bio::data_structures::bwt::{bwt, less, Occ};
use bio::data_structures::fmindex::{FMIndex, FMIndexable};
use bio::data_structures::suffix_array::suffix_array;
use std::sync::Arc;
use std::thread;

let text = b"ACGGATGCTGGATCGGATCGCGCTAGCTA$";
let patterns = vec![b"ACCG", b"TGCT"];

// Create an FM-Index for a given text.
let alphabet = alphabets::dna::iupac_alphabet();
let sa = suffix_array(text);
let bwt = Arc::new(bwt(text, &sa));
let less = Arc::new(less(bwt.as_ref(), &alphabet));
let occ = Arc::new(Occ::new(bwt.as_ref(), 3, &alphabet));
let fmindex = Arc::new(FMIndex::new(bwt, less, occ));

// Spawn threads to perform backward searches for each interval
let interval_calculators = patterns
    .into_iter()
    .map(|pattern| {
        let fmindex = fmindex.clone();
        thread::spawn(move || fmindex.backward_search(pattern.iter()))
    })
    .collect::<Vec<_>>();

// Loop through the results, extracting the positions array for each pattern
for interval_calculator in interval_calculators {
    let positions = interval_calculator.join().unwrap().occ(&sa);
    println!("{}", positions);
}

Error: invalid format string: expected `'}'`, found `'?'`

In [None]:
# :dep evcxr_input

//let mut reader = fastq::Reader::new(io::stdin());
let mut fastq = evcxr_input::get_string("Fastq:");

println!("FASTQ: {}", fastq);