method name layout

blueshen · Jun 26, 2023 · 1d20762 · 1d20762
1 parent 4d41648
commit 1d20762
Show file tree

Hide file tree

Showing 13 changed files with 113 additions and 127 deletions.
diff --git a/README.md b/README.md
@@ -55,12 +55,6 @@ mod test {
 }
 
 ```
-
-# Usage for Tantivy
-
-use [tantivy-ik](https://github.com/blueshen/tantivy-ik) project
-
-
 # BenchMark
 
 High performance
@@ -70,6 +64,11 @@ ik_tokenize_benchmark   time:   [19.366 µs 19.572 µs 19.850 µs]
                         change: [-1.5364% -0.4029% +0.7357%] (p = 0.51 > 0.05)
 
 ```
+# Usage for Tantivy
+
+use [tantivy-ik](https://github.com/blueshen/tantivy-ik) project
+
+
 ---
 Welcome to rust developer and search engine developer join us, and maintain this project together!
 

diff --git a/src/core/char_util.rs b/src/core/char_util.rs
@@ -9,21 +9,19 @@ pub enum CharType {
     OtherCjk,
 }
 
-impl TryFrom<char> for CharType {
-    type Error = ();
-
-    fn try_from(input: char) -> Result<Self, Self::Error> {
+impl From<char> for CharType {
+    fn from(input: char) -> Self {
         if input >= '0' && input <= '9' {
-            return Ok(CharType::ARABIC);
+            return CharType::ARABIC;
         } else if (input >= 'a' && input <= 'z') || (input >= 'A' && input <= 'Z') {
-            return Ok(CharType::ENGLISH);
+            return CharType::ENGLISH;
         } else {
             if let Some(ub) = unicode_blocks::find_unicode_block(input) {
                 if ub == unicode_blocks::CJK_UNIFIED_IDEOGRAPHS
                     || ub == unicode_blocks::CJK_COMPATIBILITY_IDEOGRAPHS
                     || ub == unicode_blocks::CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                 {
-                    return Ok(CharType::CHINESE);
+                    return CharType::CHINESE;
                 } else if ub == unicode_blocks::HALFWIDTH_AND_FULLWIDTH_FORMS
                     || ub == unicode_blocks::HANGUL_SYLLABLES
                     || ub == unicode_blocks::HANGUL_JAMO
@@ -32,18 +30,18 @@ impl TryFrom<char> for CharType {
                     || ub == unicode_blocks::KATAKANA
                     || ub == unicode_blocks::KATAKANA_PHONETIC_EXTENSIONS
                 {
-                    return Ok(CharType::OtherCjk);
+                    return CharType::OtherCjk;
                 }
             }
         }
-        return Ok(CharType::USELESS);
+        return CharType::USELESS;
     }
 }
 
 // identify CharType Of char
-pub fn char_type_of(input: char) -> CharType {
-    CharType::try_from(input).unwrap()
-}
+// pub fn char_type_of(input: char) -> CharType {
+//     CharType::from(input)
+// }
 
 // full char -> half char && lowercase
 pub fn regularize(input: char) -> char {

diff --git a/src/core/cjk_segmenter.rs b/src/core/cjk_segmenter.rs
@@ -22,12 +22,12 @@ impl Segmenter for CJKSegmenter {
             CharType::USELESS => {}
             _ => {
                 let char_count = utf8_len(input);
-                let hit_options = GLOBAL_DICT.lock().unwrap().match_in_main_dict_with_offset(
+                let hits = GLOBAL_DICT.lock().unwrap().match_in_main_dict_with_offset(
                     input,
                     cursor,
                     char_count - cursor,
                 );
-                for hit in hit_options.iter() {
+                for hit in hits.iter() {
                     if hit.is_match() {
                         let new_lexeme = Lexeme::new(hit.pos.clone(), LexemeType::CNWORD);
                         origin_lexemes.insert(new_lexeme);

diff --git a/src/core/cn_quantifier_segmenter.rs b/src/core/cn_quantifier_segmenter.rs
@@ -125,15 +125,15 @@ impl CnQuantifierSegmenter {
         if self.start.is_some() && self.end.is_some() {
             return true;
         }
-        if origin_lexemes.is_empty() {
+        if origin_lexemes.empty() {
             return false;
         } else {
             let last = origin_lexemes.peek_back();
             if let Some(lexeme) = last {
                 if lexeme.lexeme_type == LexemeType::ARABIC
                     || lexeme.lexeme_type == LexemeType::CNUM
                 {
-                    if lexeme.get_end_position() == cursor {
+                    if lexeme.end_position() == cursor {
                         return true;
                     }
                 }
@@ -143,19 +143,7 @@ impl CnQuantifierSegmenter {
     }
 
     fn initial_state(&self) -> bool {
-        return match self.start {
-            None => match self.end {
-                None => {
-                    true
-                }
-                Some(_) => {
-                    false
-                }
-            },
-            Some(_) => {
-                false
-            }
-        }
+        self.start.is_none() && self.end.is_none()
     }
 
     fn reset_state(&mut self) {

diff --git a/src/core/ik_arbitrator.rs b/src/core/ik_arbitrator.rs
@@ -17,23 +17,23 @@ impl IKArbitrator {
         for org_lexeme in org_lexemes.iter() {
             if !cross_path.add_cross_lexeme(org_lexeme) {
                 if self.need_add_path(&cross_path, mode) {
-                    path_map.insert(cross_path.get_path_begin() as usize, cross_path);
+                    path_map.insert(cross_path.path_begin() as usize, cross_path);
                 } else {
-                    let judge_result = self.judge(cross_path.get_head());
+                    let judge_result = self.judge(cross_path.head_node());
                     if let Some(path) = judge_result {
-                        path_map.insert(path.get_path_begin() as usize, path);
+                        path_map.insert(path.path_begin() as usize, path);
                     }
                 }
                 cross_path = LexemePath::new();
                 cross_path.add_cross_lexeme(org_lexeme);
             }
         }
         if self.need_add_path(&cross_path, mode) {
-            path_map.insert(cross_path.get_path_begin() as usize, cross_path);
+            path_map.insert(cross_path.path_begin() as usize, cross_path);
         } else {
-            let judge_result = self.judge(cross_path.get_head());
+            let judge_result = self.judge(cross_path.head_node());
             if let Some(path) = judge_result {
-                path_map.insert(path.get_path_begin() as usize, path);
+                path_map.insert(path.path_begin() as usize, path);
             }
         }
         path_map

diff --git a/src/core/ik_segmenter.rs b/src/core/ik_segmenter.rs
@@ -1,4 +1,3 @@
-use crate::core::char_util::char_type_of;
 use crate::core::char_util::regularize_str;
 use crate::core::char_util::CharType;
 use crate::core::cjk_segmenter::CJKSegmenter;
@@ -44,7 +43,7 @@ impl IKSegmenter {
         let input = regular_str.as_str();
         let mut origin_lexemes = OrderedLinkedList::<Lexeme>::new();
         for (cursor, curr_char) in input.chars().enumerate() {
-            let curr_char_type = char_type_of(curr_char);
+            let curr_char_type = CharType::from(curr_char);
             for segmenter in self.segmenters.iter_mut() {
                 segmenter.analyze(input, cursor, curr_char_type, &mut origin_lexemes);
             }
@@ -61,8 +60,8 @@ impl IKSegmenter {
             }
             if !GLOBAL_DICT.lock().unwrap().is_stop_word(
                 input,
-                result_value.get_begin_position(),
-                result_value.get_length(),
+                result_value.begin_position(),
+                result_value.length(),
             ) {
                 result_value.parse_lexeme_text(input);
                 final_results.push(result_value.clone())
@@ -82,7 +81,7 @@ impl IKSegmenter {
         let char_count = input.chars().count();
         while index < char_count {
             let curr_char = input.chars().nth(index).unwrap();
-            let cur_char_type = char_type_of(curr_char);
+            let cur_char_type = CharType::from(curr_char);
             if CharType::USELESS == cur_char_type {
                 index += 1;
                 continue;
@@ -92,12 +91,12 @@ impl IKSegmenter {
                 let mut cur_lexeme = p.poll_first();
                 while let Some(ref lexeme) = cur_lexeme {
                     results.push_back(lexeme.clone());
-                    index = lexeme.get_end_position();
+                    index = lexeme.end_position();
                     cur_lexeme = p.poll_first();
                     if let Some(ref lexeme) = cur_lexeme {
-                        while index < lexeme.get_begin_position() {
+                        while index < lexeme.begin_position() {
                             let curr_char = input.chars().nth(index).unwrap();
-                            let cur_char_type = char_type_of(curr_char);
+                            let cur_char_type = CharType::from(curr_char);
                             self.add_single_lexeme(&mut results, cur_char_type, index);
                             index += 1;
                         }

diff --git a/src/core/lexeme.rs b/src/core/lexeme.rs
@@ -60,9 +60,9 @@ impl PartialEq for Lexeme {
 
 impl PartialOrd for Lexeme {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        return if self.get_begin_position() < other.get_begin_position() {
+        return if self.begin_position() < other.begin_position() {
             Some(Ordering::Less)
-        } else if self.get_begin_position() == other.get_begin_position() {
+        } else if self.begin_position() == other.begin_position() {
             if self.pos.len() > other.pos.len() {
                 Some(Ordering::Less)
             } else if self.pos.len() == other.pos.len() {
@@ -86,29 +86,29 @@ impl Lexeme {
         }
     }
 
-    pub fn get_begin_position(&self) -> usize {
+    pub fn begin_position(&self) -> usize {
         self.offset + self.pos.start
     }
 
-    pub fn get_end_position(&self) -> usize {
+    pub fn end_position(&self) -> usize {
         self.offset + self.pos.end
     }
 
-    pub fn get_length(&self) -> usize {
+    pub fn length(&self) -> usize {
         self.pos.len()
     }
 
-    pub fn get_lexeme_text(&self) -> &str {
+    pub fn lexeme_text(&self) -> &str {
         &self.lexeme_text
     }
 
     pub fn parse_lexeme_text(&mut self, input: &str) {
-        let sub_text = utf8_slice(input, self.get_begin_position(), self.get_end_position());
+        let sub_text = utf8_slice(input, self.begin_position(), self.end_position());
         self.lexeme_text = sub_text.to_string();
     }
 
     pub fn append(&mut self, l: &Lexeme, lexeme_type: LexemeType) -> bool {
-        if self.get_end_position() == l.get_begin_position() {
+        if self.end_position() == l.begin_position() {
             self.pos.end = l.pos.end;
             self.lexeme_type = lexeme_type;
             return true;