In [2]:
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};

In [3]:
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};

/// Reads a FASTA file and extracts DNA sequences associated with their respective IDs or headers.
/// Returns a HashMap containing sequence IDs as keys and DNA sequences as values.
fn read_fasta(file_path: &str) -> HashMap<String, String> {
    let mut data = HashMap::new(); // Create an empty HashMap to store sequences
    let file = File::open(file_path).expect("Invalid filepath"); // Attempt to open the file
    let reader = BufReader::new(file); // Wrap the file with a buffer reader
    
    let mut seq_id = String::new(); // Initialize an empty string to store sequence IDs

    // Iterate through each line in the file
    for line in reader.lines() {
        let line = line.unwrap(); // Unwrap the line or handle the potential error
        
        // Check if the line starts with '>' (indicating a sequence ID)
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string(); // Extract and store the sequence ID
        } else {
            // If it's a DNA sequence line, insert or update the HashMap entry
            // If seq_id is not present, insert a new entry with an empty String
            // Then append the current line to the existing DNA sequence
            data.entry(seq_id.clone()).or_insert_with(String::new).push_str(&line);
        }
    }
    
    data // Return the populated HashMap
}

In [4]:
read_fasta("sample_file.fa")

{"Rosalind_1629": "CACCGATCGTCTGACTAGACTACGTTTGGGTACATCTCGGATACCGATTAGTTGAACGCGAAGTTCTGCGCGATTCTCATTGTCCCTGAGTCAGCACTGACCGATCAATCTGATCGTTACCTATTTGGGACAAAAAGTTAAAATACATGGATCGGCGTGGTAACGCACAACAGGAATTGCCACCAGCTAAAGATCCGCGAGCCCTCACATAACGCTGTCTGGACGATATGGGCTCGTCAACAGTCGCATCAAGGTGACCTTGTAGAACAACTAATATACTCGGCTACGTATGTGTCAATAATCCCCCTAACAGGCCAGGCACGTTTGTCCAGACTTTTACTATCTGACTAATTGAAATCATGTCTACCTTGTAGACGCAACTGGCAAAGCGGACCGCCAACGTACTGTGTGGTTTTGCACCGCTTGCAATGGTAGGAAGATAATAAATGAATGTGTGTTGAGGGTAGTCAGACCCGGTTATTGGGTTACGACTAAGGAGCTCGGGTACGCCCACGTCCCAACCCTCCTTATTTGGTAACCAAAACACCGGACCTACTCGAGCCTCAATCAGTCGGGCGACGATGTTCTCCTGTGTAATATCCTGAATGAACTGGCATGAGACAGCAAATTGACTCTGAGAACGATCCGATACGATAAGCTCCAGACGCCTGGAAAAAATTCCTTATCTCCCAATACCGGCCAGGGGACATTGAGTGTTATGCCAAAATTTCTGGAGGTAGAGCAAGACTTCAGCTGCGCTCACTGCTCCTTAGGATAGTTATAAGAGCTGTGTTATGCTGACACATCCTTATCCCGGTAGGTCCGGACAATTCGCCAGGAAAGATGACGCGTTACTGGTCACCAGAGTCCCAAACCAGAGGATAGCGCATGAGATTTCTCGATGGCGTTATCTG", "Rosalind_9749": "ATCTCTCATCACTGGCTGAGCCTGCGGCGCCAGAATTGCAGAAATA