Skip to content
This repository has been archived by the owner on Sep 21, 2021. It is now read-only.

Multiword Synonyms and Phrase Queries , Is there a mistake in the Elastic documentation? #581

Closed
pengqiuyuan opened this issue Aug 5, 2016 · 0 comments

Comments

@pengqiuyuan
Copy link

pengqiuyuan commented Aug 5, 2016

vertion: elasticsearch 2.2.1

https://www.elastic.co/guide/en/elasticsearch/guide/current/multi-word-synonyms.html
Document description:

These phrases would not match:
The usa is wealthy
The united states of america is wealthy
The U.S.A. is wealthy

However, these phrases would:
United states is wealthy
Usa states of wealthy
The U.S. of wealthy
U.S. is america

but my results were:
These phrases would not match:
The united states of america is wealthy
The U.S.A. is wealthy
The U.S. of wealthy
U.S. is america

However, these phrases would:
The usa is wealthy
United states is wealthy
Usa states of wealthy

mapping:

{
    "state": "open", 
    "settings": {
        "index": {
            "creation_date": "1470363942117", 
            "analysis": {
                "filter": {
                    "my_synonym_filter": {
                        "type": "synonym", 
                        "synonyms": [
                            "usa,united states,u s a,united states of america"
                        ]
                    }
                }, 
                "analyzer": {
                    "my_synonyms": {
                        "filter": [
                            "lowercase", 
                            "my_synonym_filter"
                        ], 
                        "tokenizer": "standard"
                    }
                }
            }, 
            "number_of_shards": "3", 
            "number_of_replicas": "1", 
            "uuid": "3Soh83J8T9e_VdU4WUhiTg", 
            "version": {
                "created": "2020199"
            }
        }
    }, 
    "mappings": {
        "country": {
            "properties": {
                "title": {
                    "analyzer": "my_synonyms", 
                    "type": "string"
                }
            }
        }
    }, 
    "aliases": [ ]
}

analyzer test:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/_analyze?pretty&analyzer=my_synonyms' -d 'The United States is wealthy'
{
  "tokens" : [ {
    "token" : "the",
    "start_offset" : 0,
    "end_offset" : 3,
    "type" : "<ALPHANUM>",
    "position" : 0
  }, {
    "token" : "united",
    "start_offset" : 4,
    "end_offset" : 10,
    "type" : "<ALPHANUM>",
    "position" : 1
  }, {
    "token" : "usa",
    "start_offset" : 4,
    "end_offset" : 17,
    "type" : "SYNONYM",
    "position" : 1
  }, {
    "token" : "u",
    "start_offset" : 4,
    "end_offset" : 10,
    "type" : "SYNONYM",
    "position" : 1
  }, {
    "token" : "united",
    "start_offset" : 4,
    "end_offset" : 10,
    "type" : "SYNONYM",
    "position" : 1
  }, {
    "token" : "states",
    "start_offset" : 11,
    "end_offset" : 17,
    "type" : "<ALPHANUM>",
    "position" : 2
  }, {
    "token" : "s",
    "start_offset" : 11,
    "end_offset" : 17,
    "type" : "SYNONYM",
    "position" : 2
  }, {
    "token" : "states",
    "start_offset" : 11,
    "end_offset" : 17,
    "type" : "SYNONYM",
    "position" : 2
  }, {
    "token" : "is",
    "start_offset" : 18,
    "end_offset" : 20,
    "type" : "<ALPHANUM>",
    "position" : 3
  }, {
    "token" : "a",
    "start_offset" : 18,
    "end_offset" : 20,
    "type" : "SYNONYM",
    "position" : 3
  }, {
    "token" : "of",
    "start_offset" : 18,
    "end_offset" : 20,
    "type" : "SYNONYM",
    "position" : 3
  }, {
    "token" : "wealthy",
    "start_offset" : 21,
    "end_offset" : 28,
    "type" : "<ALPHANUM>",
    "position" : 4
  }, {
    "token" : "america",
    "start_offset" : 21,
    "end_offset" : 28,
    "type" : "SYNONYM",
    "position" : 4
  } ]
}

My steps:

curl -XPUT 'http://10.0.29.111:9200/my_index/' -d '
{
  "settings": {
    "analysis": {
      "filter": {
        "my_synonym_filter": {
          "type": "synonym",
          "synonyms": [
            "usa,united states,u s a,united states of america"
          ]
        }
      },
      "analyzer": {
        "my_synonyms": {
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "my_synonym_filter"
          ]
        }
      }
    }
  }
}
'

curl -XPUT 'http://10.0.29.111:9200/my_index/country/_mapping' -d '
{
    "properties" : {
        "title" : {"type" : "string","analyzer" : "my_synonyms"}
    }
}
'

curl -XPUT 'http://10.0.29.111:9200/my_index/country/1' -d '
{
  "title" : "The United States is wealthy"
}
'

curl -XGET 'http://10.0.29.111:9200/my_index/_analyze?pretty&analyzer=my_synonyms' -d 'The United States is wealthy'

test1:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "The usa is wealthy"
>         }
>     }
> }'
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 1,
    "max_score" : 1.7452254,
    "hits" : [ {
      "_index" : "my_index",
      "_type" : "country",
      "_id" : "1",
      "_score" : 1.7452254,
      "_source" : {
        "title" : "The United States is wealthy"
      }
    } ]
  }
}

test2:

pqy:~ apple$ 
pqy:~ apple$ 
pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "The united states of america is wealthy"
>         }
>     }
> }'
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 0,
    "max_score" : null,
    "hits" : [ ]
  }
}

test3:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "The U.S.A. is wealthy"
>         }
>     }
> }'
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 0,
    "max_score" : null,
    "hits" : [ ]
  }
}

test4:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "United states is wealthy"
>         }
>     }
> }'
{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 1,
    "max_score" : 3.9460726,
    "hits" : [ {
      "_index" : "my_index",
      "_type" : "country",
      "_id" : "1",
      "_score" : 3.9460726,
      "_source" : {
        "title" : "The United States is wealthy"
      }
    } ]
  }
}

test5:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "Usa states of wealthy"
>         }
>     }
> }'
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 1,
    "max_score" : 4.274912,
    "hits" : [ {
      "_index" : "my_index",
      "_type" : "country",
      "_id" : "1",
      "_score" : 4.274912,
      "_source" : {
        "title" : "The United States is wealthy"
      }
    } ]
  }
}

test6:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "The U.S. of wealthy"
>         }
>     }
> }'
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 0,
    "max_score" : null,
    "hits" : [ ]
  }
}

test7:

pqy:~ apple$ curl -XGET 'http://10.0.29.111:9200/my_index/country/_search?pretty' -d '{
>     "query": {
>         "match_phrase": {
>             "title": "U.S. is america"
>         }
>     }
> }'
{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "hits" : {
    "total" : 0,
    "max_score" : null,
    "hits" : [ ]
  }
}
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant