# Finding a Shared Motif

Link: https://rosalind.info/problems/lcsm/

In [2]:
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::collections::{HashMap, HashSet};

In [3]:
#[derive(Debug, Clone)] 
struct DNASeq {
    seq: String,
}

impl DNASeq {
    
    fn contains(&self, other: &Self) -> bool {
        self.seq.contains(&other.seq)
    }
    
    fn push_str(&mut self, s: &str) {
        self.seq.push_str(s);
    }
    
    fn len(&self) -> usize {
        self.seq.len()
    }
    
    fn substrings(&self) -> Vec<Self> {
        let mut substring_list = vec![];
        let mut substring_set = HashSet::new();
        for i in 2..self.len() {
            let slice: Vec<char> = self.seq.chars().collect();
            let collection = slice.windows(i);
            for char_vector in collection {
                let substring = char_vector.iter().cloned().collect::<String>();
                substring_set.insert(substring);
            }
        }
        for substring in substring_set {
            substring_list.push(Self{seq: substring});
        }
        substring_list
    }    
}

impl PartialEq for DNASeq {
    fn eq(&self, other: &Self) -> bool {
        self.seq == other.seq
    }
}

In [4]:
fn read_fasta(file_path: &str) -> HashMap<String, DNASeq> {
    let mut data = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    
    let mut seq_id = String::new();
    for line in reader.lines() {
        let line = line.unwrap();
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string();
        } else {
            data.entry(seq_id.clone()).or_insert(DNASeq {seq: "".to_string() }).push_str(&line);
        }
    }
    
    data
}

In [None]:
let data: HashMap<String, DNASeq> = read_fasta("data/rosalind_lcsm.txt");
let mut result: HashSet<String> = HashSet::new();
let mut shortest_seq = DNASeq{seq: "".to_string()};
let mut min_length = std::f64::INFINITY;
let mut longest_shared_motif_length = 0;
let mut longest_motif = "".to_string();

for (_, seq) in data.iter() {
    let current_length = seq.len() as f64;
    if current_length < min_length {
        min_length = current_length;
        shortest_seq = seq.clone();
    }
}

let substring_list = shortest_seq.substrings();

for motif in substring_list.iter() {
    result.insert(motif.seq.clone());
    for (_, dna_seq) in data.iter() {
        if !dna_seq.contains(motif) {
            result.remove(&motif.seq);
            break;
        }
        
    }
}

for motif in result {
    if motif.len() > longest_shared_motif_length {
        longest_motif = motif.clone();
        longest_shared_motif_length = motif.len();
    }
}

longest_motif