Skip to content

dimmy82/search-query-parser

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

69 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

search-query-parser

crates.io docs.rs build

what is this library for

search-query-parser is made to parse complex search query into layered search conditions, so it will be easy to construct Elasticsearch query DSL or something else.

the complex search query like this: ↓↓↓

(word1 and -word2) or (("phrase word 1" or -"phrase word 2") and -(" a long phrase word " or word3))

will be parsed into layered search conditions like this: ↓↓↓

Condition::Operator(
    Operator::Or,
    vec![
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Keyword("word1".into()),
                Condition::Not(Box::new(Condition::Keyword("word2".into()))),
            ]
        ),
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Operator(
                    Operator::Or,
                    vec![
                        Condition::PhraseKeyword("phrase word 1".into()),
                        Condition::Not(Box::new(Condition::PhraseKeyword(
                            "phrase word 2".into()
                        )))
                    ]
                ),
                Condition::Not(Box::new(Condition::Operator(
                    Operator::Or,
                    vec![
                        Condition::PhraseKeyword(" a long phrase word ".into()),
                        Condition::Keyword("word3".into())
                    ]
                )))
            ]
        ),
    ]
)

the conditions are constructed by the enum Condition and enum Operator.

#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Condition {
    None,
    Keyword(String),
    PhraseKeyword(String),
    Not(Box<Condition>),
    Operator(Operator, Vec<Condition>),
}

#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Operator {
    And,
    Or,
}

usage

1. for Rust project

[dependencies]
search-query-parser = "0.1.4"
use search_query_parser::parse_query_to_condition;

let condition = parse_query_to_condition("any query string you like")?;

2. for REST Api

refer to search-query-parser-api repository

3. for JVM language via JNI

refer to search-query-parser-cdylib repository

parse rules

1. space {\u0020} or full width space {\u3000} are identified as AND operator

fn test_keywords_concat_with_spaces() {
    let actual = parse_query_to_condition("word1 word2").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Keyword("word1".into()),
                Condition::Keyword("word2".into())
            ]
        )
    )
}

2. AND operator has higher priority than OR operator

fn test_keywords_concat_with_and_or() {
    let actual =
        parse_query_to_condition("word1 OR word2 AND word3").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::Or,
            vec![
                Condition::Keyword("word1".into()),
                Condition::Operator(
                    Operator::And,
                    vec![
                        Condition::Keyword("word2".into()),
                        Condition::Keyword("word3".into()),
                    ]
                )
            ]
        )
    )
}

3. conditions in brackets have higher priority

fn test_brackets() {
    let actual =
        parse_query_to_condition("word1 AND (word2 OR word3)")
            .unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Keyword("word1".into()),
                Condition::Operator(
                    Operator::Or,
                    vec![
                        Condition::Keyword("word2".into()),
                        Condition::Keyword("word3".into()),
                    ]
                )
            ]
        )
    )
}

4. double quote will be parsed for phrase keyword

fn test_double_quote() {
    let actual = parse_query_to_condition(
        "\"word1 AND (word2 OR word3)\" word4",
    )
    .unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::PhraseKeyword(
                    "word1 AND (word2 OR word3)".into()
                ),
                Condition::Keyword("word4".into()),
            ]
        )
    )
}

5. minus(hyphen) will be parsed for negative condition

※ it can be used before keyword, phrase keyword or brackets

fn test_minus() {
    let actual = parse_query_to_condition(
        "-word1 -\"word2\" -(word3 OR word4)",
    )
    .unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Not(Box::new(Condition::Keyword("word1".into()))),
                Condition::Not(Box::new(Condition::PhraseKeyword("word2".into()))),
                Condition::Not(Box::new(Condition::Operator(
                    Operator::Or,
                    vec![
                        Condition::Keyword("word3".into()),
                        Condition::Keyword("word4".into())
                    ]
                ))),
            ]
        )
    )
}

6. correcting incorrect search query

  1. empty brackets
fn test_empty_brackets() {
    let actual = parse_query_to_condition("A AND () AND B").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Keyword("A".into()),
                Condition::Keyword("B".into()),
            ]
        )
    )
}
  1. reversed brackets
fn test_reverse_brackets() {
    let actual = parse_query_to_condition("A OR B) AND (C OR D").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::Or,
            vec![
                Condition::Keyword("A".into()),
                Condition::Operator(
                    Operator::And,
                    vec![
                        Condition::Keyword("B".into()),
                        Condition::Keyword("C".into()),
                    ]
                ),
                Condition::Keyword("D".into()),
            ]
        )
    )
}
  1. wrong number of brackets
fn test_missing_brackets() {
    let actual = parse_query_to_condition("(A OR B) AND (C").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Operator(
                    Operator::Or,
                    vec![
                        Condition::Keyword("A".into()),
                        Condition::Keyword("B".into()),
                    ]
                ),
                Condition::Keyword("C".into()),
            ]
        )
    )
}
  1. empty phrase keyword
fn test_empty_phrase_keywords() {
    let actual = parse_query_to_condition("A AND \"\" AND B").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Keyword("A".into()),
                Condition::Keyword("B".into()),
            ]
        )
    )
}
  1. wrong number or double quote
fn test_invalid_double_quote() {
    let actual = parse_query_to_condition("\"A\" OR \"B OR C").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::Or,
            vec![
                Condition::PhraseKeyword("A".into()),
                Condition::Keyword("B".into()),
                Condition::Keyword("C".into()),
            ]
        )
    )
}
  1. and or are next to each other
fn test_invalid_and_or() {
    let actual = parse_query_to_condition("A AND OR B").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::Or,
            vec![
                Condition::Keyword("A".into()),
                Condition::Keyword("B".into()),
            ]
        )
    )
}

7. search query optimization

fn test_unnecessary_nest_brackets() {
    let actual = parse_query_to_condition("(A OR (B OR C)) AND D").unwrap();
    assert_eq!(
        actual,
        Condition::Operator(
            Operator::And,
            vec![
                Condition::Operator(
                    Operator::Or,
                    vec![
                        Condition::Keyword("A".into()),
                        Condition::Keyword("B".into()),
                        Condition::Keyword("C".into()),
                    ]
                ),
                Condition::Keyword("D".into()),
            ]
        )
    )
}

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published