In [2]:
:dep ndarray = { version = "0.15.6" }

In [3]:
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::collections::HashMap;
use ndarray::prelude::*;
use std::cmp;

In [8]:
#[derive(Debug)] 
pub struct Protein {
    seq: String,
}

impl Protein {
    
    fn len(&self) -> usize {
        self.seq.len()
    }
    
    fn push_str(&mut self, s: &str) {
        self.seq.push_str(s);
    }
    
    fn edit_distance(&self, other: &Self) -> usize {
        let matrix = self.edit_distance_matrix(other);
        let distance = self.last_element(other, matrix);
        distance
    }
    
    fn last_element(&self, other: &Self, matrix: Array2<usize>) -> usize {
        matrix[(self.len(), other.len())]
    }
    
    fn edit_distance_matrix(&self, other: &Self) -> Array2<usize> {
        let mut seq_1 = self.seq.clone();
        seq_1.insert(0, ' ');
        let mut seq_2 = other.seq.clone();
        seq_2.insert(0, ' ');
        let vec_for_array_initialization = vec![0; seq_1.len() * seq_2.len()];
        let mut matrix = Array::from_shape_vec((seq_1.len(), seq_2.len()), vec_for_array_initialization).unwrap();
        for (i, x) in seq_1.chars().enumerate() {
            for (j, y) in seq_2.chars().enumerate() {
                if (x == ' ') & (y != ' ') {
                    matrix[(i, j)] = j;
                } else if (y == ' ') & (x != ' ') {
                    matrix[(i, j)] = i;
                } else {
                    if (x == ' ') & (y == ' ') { continue; }
                    else {
                        let left = matrix[(i, j-1)];
                        let top = matrix[(i-1, j)];
                        let diagonal = matrix[(i-1, j-1)];
                        let min_value = cmp::min(cmp::min(left, top), diagonal);
                        if x == y {
                            matrix[(i, j)] = diagonal;
                        } else {
                            matrix[(i, j)] = min_value + 1;
                        } 
                    }
                }
            }
        }
        matrix 
    }
    
    
}

In [9]:
fn read_fasta(file_path: &str) -> HashMap<String, Protein> {
    let mut data = HashMap::new();
    let file = File::open(file_path).expect("Invalid filepath");
    let reader = BufReader::new(file);
    
    let mut seq_id = String::new();
    for line in reader.lines() {
        let line = line.unwrap();
        if line.starts_with('>') {
            seq_id = line.trim_start_matches('>').to_string();
        } else {
            data.entry(seq_id.clone()).or_insert(Protein {seq: "".to_string() }).push_str(&line);
        }
    }
    
    data
}

In [12]:
let data: HashMap<String, Protein> = read_fasta("data/rosalind_edit.txt");
let data: Vec<Protein> = data.into_values().collect();

In [13]:
data[0].edit_distance(&data[1])

330