# Finding a Shared Spliced Motif

Link: https://rosalind.info/problems/lcsq/

In [3]:
:dep ndarray = { version = "0.15.6" }

In [4]:
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::collections::HashMap;
use ndarray::prelude::*

In [33]:
#[derive(Debug, Clone)] 
struct DNASeq {
    seq: String,
}

#[derive(Debug, Clone)] 
struct Trace {
    value: i32,
    bases: (char, char),
    from: Option<Box<Trace>>,
}

impl Trace {
    
    fn new(value: i32, bases: (char, char), from: Option<Box<Trace>>) -> Trace {
        Trace {value, bases, from}
    }
    
    fn get_value(&self) -> i32 {
        self.value
    }
    
    fn get_from(&self) -> Option<Box<Trace>> {
        self.from.clone()
    }
    
    fn get_bases(&self) -> (char, char) {
        self.bases
    }

}

impl DNASeq {
    
    fn len(&self) -> usize {
        self.seq.len()
    }
    
    fn push_str(&mut self, s: &str) {
        self.seq.push_str(s);
    }
    
    fn prepend_str(&mut self, s: &str) {
        self.seq.insert_str(0, s);
    }
    
    fn longest_subsequence(&self, other: &Self) -> Self {
        let matrix = self.subsequence_matrix(other);
        let last = self.last_element(other, matrix);
        let subsequence = Self::traceback(last);
        subsequence
    }
    
    fn traceback(last_element: Box<Trace>) -> Self {
        let mut subsequence = Self{ seq: "".to_string() };
        let mut trace_object = last_element;
        let (base_1, base_2) = trace_object.get_bases();
        if base_1 == base_2 {
            subsequence.prepend_str(&base_1.to_string());
        }
        while Option::is_some(&trace_object.get_from()) {
            trace_object = trace_object.get_from().unwrap();
            let (base_1, base_2) = trace_object.get_bases();
            if (base_1 == base_2) & (base_1 != '0') {
                subsequence.prepend_str(&base_1.to_string());
            }
        }
        subsequence
    }
    
    fn last_element(&self, other: &Self, matrix: Array2<Trace>) -> Box<Trace> {
        Box::new(matrix[(self.len(), other.len())].clone())
    }
    
    fn subsequence_matrix(&self, other: &Self) -> Array2<Trace> {
        let mut seq_1 = self.seq.clone();
        seq_1.insert(0, '0');
        let mut seq_2 = other.seq.clone();
        seq_2.insert(0, '0');
        let start = Trace {value: 0, bases: ('0', '0'), from: None};
        let vec_for_array_initialization = vec![start; seq_1.len() * seq_2.len()];
        let mut matrix = Array::from_shape_vec((seq_1.len(), seq_2.len()), vec_for_array_initialization).unwrap();
        for (i, x) in seq_1.chars().enumerate() {
            for (j, y) in seq_2.chars().enumerate() {
                if (x != '0') & (y != '0') {
                    if x == y {
                        let trace_object = Trace::new(matrix[(i-1, j-1)].get_value()+1, (x, y), Some(Box::new(matrix[(i-1, j-1)].clone())));
                        matrix[(i, j)] = trace_object;
                    } else {
                        if matrix[(i-1, j)].get_value() > matrix[(i, j-1)].get_value() {
                            let trace_object = Trace::new(matrix[(i-1, j)].get_value(), (x, y), Some(Box::new(matrix[(i-1, j)].clone())));
                            matrix[(i, j)] = trace_object;
                        } else {
                            let trace_object = Trace::new(matrix[(i, j-1)].get_value(), (x, y), Some(Box::new(matrix[(i, j-1)].clone())));
                            matrix[(i, j)] = trace_object;
                        }
                    }
                }
            }
        }
       matrix 
    }
    
}

In [34]:
fn read_fasta(file_path: &str) -> HashMap<String, DNASeq> {
    let mut data = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    
    let mut seq_id = String::new();
    for line in reader.lines() {
        let line = line.unwrap();
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string();
        } else {
            data.entry(seq_id.clone()).or_insert(DNASeq {seq: "".to_string() }).push_str(&line);
        }
    }
    
    data
}

In [37]:
let data: HashMap<String, DNASeq> = read_fasta("data/rosalind_lcsq.txt");
let data: Vec<DNASeq> = data.into_values().collect();

In [38]:
data[0].longest_subsequence(&data[1])

DNASeq { seq: "ATCAGGATCGGTTTAGACGTGGACGTGTTGGCATAGTTCACGGCCTCCGCCCCGTTCGTTCCCGGCGGTCAAATAGTTCTCTGAGTGAGAGAAAGACTCCCACCATGCGTCCTGGGGTGCAAGGTGAGAAATACACGACATCCATTGCACCCCCCCCAGACAAATTGGAATCCAGAGCCAGATTGGCGCAAAGATCGGGCGTGCCCAATGGTCGTACCATCCTTTCAGGTTTCGTGCCCGAAATGAAGCGGTGCATTATGACGAACTATCCAGTTTTCTTCGGGGGCTCGTAGTAAAATATTACTGGCCGCAAATCGAATCGGAGAGTGACTAAGAGATCAGCACTGGGGGATCGTTGAGTGTCATGGCGACCAGTCCAGTTTGTACCTTCAACGGTGAGATTAGTTTCGGAGCTGGACCCCTCCGGAAACAAACCTCCTTCTCGGTTGGTTACGATAGGACGGACCTCGGACCAGAAAATGAGTATTCAGATTTTTGAAGGGTACGCGGTATGTTTACCGTACTTCAACCCCGTCTACAAACGGTCGCTGCCAGAAACTA" }