# Computing GC Content

Link: https://rosalind.info/problems/gc/

In [2]:
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::collections::HashMap;

In [3]:
#[derive(Debug)] 
pub struct DNASeq {
    seq: String,
}

impl DNASeq {
    
    fn push_str(&mut self, s: &str) {
        self.seq.push_str(s);
    }
    
    fn gc_content(&self) -> f64 {
        let counts = self.count_nucleotides();
        let cg = counts.get(&'C').unwrap() + counts.get(&'G').unwrap();
        (cg as f64 / self.seq.len() as f64) * 100.0
    }
    
    fn count_nucleotides(&self) -> HashMap<char, usize> {
        let mut map = HashMap::new();
        let things = vec!['A', 'C', 'G', 'T'];
        for character in self.seq.chars() {
            if things.contains(&character) {
                // entry returns the value in key, value pair is key exists
                // count is a mutable reference
                let count = map.entry(character).or_insert(0);
                // so we need to dereference it
                *count += 1;
            }
        }
        map
    }
    
}

In [4]:
fn read_fasta(file_path: &str) -> HashMap<String, DNASeq> {
    let mut data = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    
    let mut seq_id = String::new();
    for line in reader.lines() {
        let line = line.unwrap();
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string();
        } else {
            data.entry(seq_id.clone()).or_insert(DNASeq {seq: "".to_string() }).push_str(&line);
        }
    }
    
    data
}

In [20]:
let data = read_fasta("data/rosalind_gc.txt");

{
    let mut id_of_max_value = "";
    let mut max_value = 0 as f64;

    for (seq_id, seq) in data.iter() {
        let current_gc_content = seq.gc_content();
        if current_gc_content > max_value {
            max_value = current_gc_content;
            id_of_max_value = seq_id;
        }
    }

    (id_of_max_value, max_value)
}

("Rosalind_5158", 51.09489051094891)