# RNA Splicing

Link: https://rosalind.info/problems/splc/

In [100]:
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::collections::HashMap;

In [101]:
fn read_codon_table(file_path: &str) -> HashMap<String, String> {
    let mut codon_to_protein_map = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    
    for line in reader.lines() {
        let line = line.unwrap();
        let items: Vec<&str> = line.split_whitespace().collect();
        let (codon, protein) = (items[0].to_string(), items[1].to_string());
        codon_to_protein_map.insert(codon, protein);
    }
    codon_to_protein_map
}

In [102]:
#[derive(Debug)] 
pub struct RNASeq {
    seq: String,
}

#[derive(Debug)] 
pub struct Protein {
    seq: String,
}

#[derive(Debug)] 
pub struct DNASeq {
    seq: String,
}

impl DNASeq {
    // Define a method to transcribe DNA into RNA
    pub fn transcribe(&self) -> RNASeq {
        // Replace every 'T' with 'U' in the sequence
        let seq = self.seq.chars()
            // closures are like lambda functions
            .map(|x| match x {
                'T' => 'U',
                _ => x,
            })
            .collect(); // This transforms an interator into a collection

        // Return a new RNA sequence
        RNASeq { seq }
    }
    
    fn splice(&mut self, introns:Vec<String>) -> () {
        for intron in introns.iter() {
            self.seq = self.seq.replace(intron, "");
        }
    }
    
}


impl RNASeq {
    
    pub fn translate(&self) -> Protein {
        let codon_to_protein_map = read_codon_table("data/rna_codon_table.txt");
        let seq_vector: Vec<char> = self.seq.chars().collect();
        let mut protein = "".to_string();
        for x in seq_vector.chunks(3) {
            let codon = x.iter().collect::<String>();
            let amino_acid = codon_to_protein_map.get(&codon).unwrap();
            if amino_acid == "*" {
                break;
            } else {
                protein += amino_acid;
            }
        }
        Protein{ seq:protein }
    }
}

In [103]:
fn read_fasta(file_path: &str) -> (DNASeq, Vec<String>) {
    let mut data = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    let mut seq_id_list = vec![];
    let mut seq_id = String::new();
    for line in reader.lines() {
        let line = line.unwrap();
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string();
            seq_id_list.push(seq_id.clone());
        } else {
            data.entry(seq_id.clone()).or_insert(String::new()).push_str(&line);
        }
    }
    let (_, first_seq) = data.remove_entry(&seq_id_list[0]).unwrap();
    let mut introns = vec![];
    for intron in data.values() {
        introns.push(intron.to_string());
    }
    (DNASeq{seq: first_seq}, introns)
}

In [107]:
let (mut seq, introns) = read_fasta("data/rosalind_splc.txt");

In [108]:
seq.splice(introns)

()

In [109]:
seq.transcribe().translate()

Protein { seq: "MPRLDEINCLSMIKTSTPITLRSRPSPVKVLEPNLSCVWSTLVVSPARIGTDTRRNYPVFTRFWPLILGLENRDNHLRPLQNSLGARGLSAAHQETPGFFSHRALSGWKATHSQSELAKLDADIQALLRWVRLAQDQMAVLGWTIREVTLTLAAMNKHVKETTARGFSGVTRPDFRRLKVRSSENEWMLKV" }