# Finding a Spliced Motif

Link: https://rosalind.info/problems/sseq/

In [169]:
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::collections::{HashMap, HashSet};

In [170]:
#[derive(Debug, Clone)] 
struct DNASeq {
    seq: String,
}

impl DNASeq {
    
    fn len(&self) -> usize {
        self.seq.len()
    }
    
    fn push_str(&mut self, s: &str) {
        self.seq.push_str(s);
    }
    
    fn subsequence(&self, other: &Self) -> Vec<Vec<usize>> {
        let mut seq_map = HashMap::new();

        for x in ["A", "C", "T", "G"] {
            let indices: Vec<_> = self.seq.match_indices(x).map(|a| a.0).collect();
            seq_map.insert(x, indices);
        }
        
        let first_base = seq_map.get(&other.seq[..1]).unwrap();
        let mut result = vec![];
        
        for x in first_base.iter() { 
            let mut sub = vec![];
            sub.push(x.clone()); // x is start of the subsequence
            for a in other.seq[1..].chars() { // a represents anything except the start
                let next_base = seq_map.get(&*a.to_string()).unwrap();
                for y in next_base.iter() {
                    if y > &sub[sub.len()-1] {
                        sub.push(y.clone());
                        break;
                    }
                }
                
            }
            if sub.len() == other.seq.len() {
                sub = sub.into_iter().map(|x| x + 1).collect(); // assume index starts at 1 instead of 0
                result.push(sub);
            }
        }
        
        result
        
    }
    
}

In [171]:
fn read_fasta(file_path: &str) -> HashMap<String, DNASeq> {
    let mut data = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    
    let mut seq_id = String::new();
    for line in reader.lines() {
        let line = line.unwrap();
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string();
        } else {
            data.entry(seq_id.clone()).or_insert(DNASeq {seq: "".to_string() }).push_str(&line);
        }
    }
    
    data
}

In [179]:
let data: HashMap<String, DNASeq> = read_fasta("data/rosalind_sseq.txt");
let data: Vec<DNASeq> = data.into_values().collect();

In [180]:
let seq_1: DNASeq = data[0].clone();
let seq_2: DNASeq = data[1].clone();
let output;

if seq_1.len() > seq_2.len() {
    output = seq_1.subsequence(&seq_2);
} else {
    output = seq_2.subsequence(&seq_1)
}

()

In [181]:
for x in output[0].iter() {
    println!{"{}", x};
}

4
11
15
17
19
22
26
34
39
42
43
45
47
56
64
66


()