Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

String sorting incorrect after reindex #3078

Closed
clintongormley opened this Issue May 23, 2013 · 13 comments

Comments

Projects
None yet
4 participants
@clintongormley
Copy link
Member

commented May 23, 2013

After reindexing a doc, it is not being returned in the correct sort order (when sorting on a string field)

First, index docs 1..100 with a string field user:

curl -XPOST 'http://127.0.0.1:9200/test/test/_bulk?pretty=1'  -d '
{"index" : {"_id" : "1"}}
{"user" : "1"}
{"index" : {"_id" : "2"}}
{"user" : "2"}
{"index" : {"_id" : "3"}}
{"user" : "3"}
{"index" : {"_id" : "4"}}
{"user" : "4"}
{"index" : {"_id" : "5"}}
{"user" : "5"}
{"index" : {"_id" : "6"}}
{"user" : "6"}
{"index" : {"_id" : "7"}}
{"user" : "7"}
{"index" : {"_id" : "8"}}
{"user" : "8"}
{"index" : {"_id" : "9"}}
{"user" : "9"}
{"index" : {"_id" : "10"}}
{"user" : "10"}
{"index" : {"_id" : "11"}}
{"user" : "11"}
{"index" : {"_id" : "12"}}
{"user" : "12"}
{"index" : {"_id" : "13"}}
{"user" : "13"}
{"index" : {"_id" : "14"}}
{"user" : "14"}
{"index" : {"_id" : "15"}}
{"user" : "15"}
{"index" : {"_id" : "16"}}
{"user" : "16"}
{"index" : {"_id" : "17"}}
{"user" : "17"}
{"index" : {"_id" : "18"}}
{"user" : "18"}
{"index" : {"_id" : "19"}}
{"user" : "19"}
{"index" : {"_id" : "20"}}
{"user" : "20"}
{"index" : {"_id" : "21"}}
{"user" : "21"}
{"index" : {"_id" : "22"}}
{"user" : "22"}
{"index" : {"_id" : "23"}}
{"user" : "23"}
{"index" : {"_id" : "24"}}
{"user" : "24"}
{"index" : {"_id" : "25"}}
{"user" : "25"}
{"index" : {"_id" : "26"}}
{"user" : "26"}
{"index" : {"_id" : "27"}}
{"user" : "27"}
{"index" : {"_id" : "28"}}
{"user" : "28"}
{"index" : {"_id" : "29"}}
{"user" : "29"}
{"index" : {"_id" : "30"}}
{"user" : "30"}
{"index" : {"_id" : "31"}}
{"user" : "31"}
{"index" : {"_id" : "32"}}
{"user" : "32"}
{"index" : {"_id" : "33"}}
{"user" : "33"}
{"index" : {"_id" : "34"}}
{"user" : "34"}
{"index" : {"_id" : "35"}}
{"user" : "35"}
{"index" : {"_id" : "36"}}
{"user" : "36"}
{"index" : {"_id" : "37"}}
{"user" : "37"}
{"index" : {"_id" : "38"}}
{"user" : "38"}
{"index" : {"_id" : "39"}}
{"user" : "39"}
{"index" : {"_id" : "40"}}
{"user" : "40"}
{"index" : {"_id" : "41"}}
{"user" : "41"}
{"index" : {"_id" : "42"}}
{"user" : "42"}
{"index" : {"_id" : "43"}}
{"user" : "43"}
{"index" : {"_id" : "44"}}
{"user" : "44"}
{"index" : {"_id" : "45"}}
{"user" : "45"}
{"index" : {"_id" : "46"}}
{"user" : "46"}
{"index" : {"_id" : "47"}}
{"user" : "47"}
{"index" : {"_id" : "48"}}
{"user" : "48"}
{"index" : {"_id" : "49"}}
{"user" : "49"}
{"index" : {"_id" : "50"}}
{"user" : "50"}
{"index" : {"_id" : "51"}}
{"user" : "51"}
{"index" : {"_id" : "52"}}
{"user" : "52"}
{"index" : {"_id" : "53"}}
{"user" : "53"}
{"index" : {"_id" : "54"}}
{"user" : "54"}
{"index" : {"_id" : "55"}}
{"user" : "55"}
{"index" : {"_id" : "56"}}
{"user" : "56"}
{"index" : {"_id" : "57"}}
{"user" : "57"}
{"index" : {"_id" : "58"}}
{"user" : "58"}
{"index" : {"_id" : "59"}}
{"user" : "59"}
{"index" : {"_id" : "60"}}
{"user" : "60"}
{"index" : {"_id" : "61"}}
{"user" : "61"}
{"index" : {"_id" : "62"}}
{"user" : "62"}
{"index" : {"_id" : "63"}}
{"user" : "63"}
{"index" : {"_id" : "64"}}
{"user" : "64"}
{"index" : {"_id" : "65"}}
{"user" : "65"}
{"index" : {"_id" : "66"}}
{"user" : "66"}
{"index" : {"_id" : "67"}}
{"user" : "67"}
{"index" : {"_id" : "68"}}
{"user" : "68"}
{"index" : {"_id" : "69"}}
{"user" : "69"}
{"index" : {"_id" : "70"}}
{"user" : "70"}
{"index" : {"_id" : "71"}}
{"user" : "71"}
{"index" : {"_id" : "72"}}
{"user" : "72"}
{"index" : {"_id" : "73"}}
{"user" : "73"}
{"index" : {"_id" : "74"}}
{"user" : "74"}
{"index" : {"_id" : "75"}}
{"user" : "75"}
{"index" : {"_id" : "76"}}
{"user" : "76"}
{"index" : {"_id" : "77"}}
{"user" : "77"}
{"index" : {"_id" : "78"}}
{"user" : "78"}
{"index" : {"_id" : "79"}}
{"user" : "79"}
{"index" : {"_id" : "80"}}
{"user" : "80"}
{"index" : {"_id" : "81"}}
{"user" : "81"}
{"index" : {"_id" : "82"}}
{"user" : "82"}
{"index" : {"_id" : "83"}}
{"user" : "83"}
{"index" : {"_id" : "84"}}
{"user" : "84"}
{"index" : {"_id" : "85"}}
{"user" : "85"}
{"index" : {"_id" : "86"}}
{"user" : "86"}
{"index" : {"_id" : "87"}}
{"user" : "87"}
{"index" : {"_id" : "88"}}
{"user" : "88"}
{"index" : {"_id" : "89"}}
{"user" : "89"}
{"index" : {"_id" : "90"}}
{"user" : "90"}
{"index" : {"_id" : "91"}}
{"user" : "91"}
{"index" : {"_id" : "92"}}
{"user" : "92"}
{"index" : {"_id" : "93"}}
{"user" : "93"}
{"index" : {"_id" : "94"}}
{"user" : "94"}
{"index" : {"_id" : "95"}}
{"user" : "95"}
{"index" : {"_id" : "96"}}
{"user" : "96"}
{"index" : {"_id" : "97"}}
{"user" : "97"}
{"index" : {"_id" : "98"}}
{"user" : "98"}
{"index" : {"_id" : "99"}}
{"user" : "99"}
{"index" : {"_id" : "100"}}
{"user" : "100"}
'

Search, sorting on user:

curl -XGET 'http://127.0.0.1:9200/test/_search?pretty=1'  -d '
{
   "sort" : {
      "user" : "asc"
   },
   "fields" : [],
   "size" : 10
}
'

Results show that user:1 is in first position:

# {
#    "hits" : {
#       "hits" : [
#          {
#             "sort" : [
#                "1"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "1",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "10"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "10",
#             "_type" : "test"
#          },
# ....

Now reindex the first doc, with the same values:

curl -XPUT 'http://127.0.0.1:9200/test/test/1?pretty=1'  -d '
{
   "user" : "1"
}
'

And search again:

curl -XGET 'http://127.0.0.1:9200/test/_search?pretty=1'  -d '
{
   "sort" : {
      "user" : "asc"
   },
   "fields" : [],
   "size" : 10
}
'

Doc with user:1 no longer appears in the correct position, in fact it doesn't appear anywhere in the first 10 results:

# {
#    "hits" : {
#       "hits" : [
#          {
#             "sort" : [
#                "10"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "10",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "100"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "100",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "11"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "11",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "12"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "12",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "13"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "13",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "14"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "14",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "15"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "15",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "16"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "16",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "17"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "17",
#             "_type" : "test"
#          },
#          {
#             "sort" : [
#                "18"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "18",
#             "_type" : "test"
#          }
#       ],
#       "max_score" : null,
#       "total" : 100
#    },
#    "timed_out" : false,
#    "_shards" : {
#       "failed" : 0,
#       "successful" : 5,
#       "total" : 5
#    },
#    "took" : 3
# }

However, if you return all 100 docs, then it appears in the first position again (correctly):

curl -XGET 'http://127.0.0.1:9200/test/_search?pretty=1'  -d '
{
   "sort" : {
      "user" : "asc"
   },
   "fields" : [],
   "size" : 100
}
'

# {
#    "hits" : {
#       "hits" : [
#          {
#             "sort" : [
#                "1"
#             ],
#             "_score" : null,
#             "_index" : "test",
#             "_id" : "1",
#             "_type" : "test"
#          },

which leads me to think that it is the shard level sorting which is incorrect.

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

Thanks for the ticket, @clintongormley . Sadly this bug is causing many people to yell at me :( They update a record and it's removed from their UI.

@ghost ghost assigned martijnvg May 23, 2013

@s1monw

This comment has been minimized.

Copy link
Contributor

commented May 23, 2013

what version does this reproduce on? does this still happen on master?

@s1monw

This comment has been minimized.

Copy link
Contributor

commented May 23, 2013

@martijnvg this is fixed it seems. this caused by #2991 and fixed in master and 0.90

@s1monw s1monw closed this May 23, 2013

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

@s1monw this issue is definitely happening on 0.90.0 release. I also just pulled down the repo and ran it against v 1.0.0 beta1. The issue still exists there as well.

Please verify and reopen.

@clintongormley can you confirm?

@clintongormley

This comment has been minimized.

Copy link
Member Author

commented May 23, 2013

@sarmiena For me it is broken in 0.90.0, but fixed in master and in the 0.90.1 branch.

Unless you have a different test to show otherwise?

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

@clintongormley I'm building from master using:

mvn clean package -DskipTests

However, this is building elasticsearch-1.0.0.Beta1-SNAPSHOT.

I'm not sure how to build 0.90.1 since there is no tag or branch in the repo that I can see.

Let me know if you want me to show you (live) how to reproduce it using 1.0.0.Beta1

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

@clintongormley ok I just ran the same scenario on 0.90.1 branch and it's definitely still happening. Not sure why yours isn't showing the same issue.

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

@clintongormley Sorry to keep bothering :) However I have good news and bad news:

Good news: Your test case does work in 0.90.1
Bad news: An alternative test case produces same problem

You used bulk upload, while I simply added 1 record at a time (100 times).

https://gist.github.com/sarmiena/d945848fd683f39d212c

I used Ruby to iterate 100 POST requests in that gist, but you can use whatever you'd like.

The issue doesn't appear to be resolved. Can we reopen the ticket?

@clintongormley

This comment has been minimized.

Copy link
Member Author

commented May 23, 2013

@s1monw

This comment has been minimized.

Copy link
Contributor

commented May 23, 2013

i added a testcase that mirrors your ruby test in java and it doesn't fail. I can't reproduce your problem I am sorry. Are you sure you build 0.90.1?

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

@s1monw I'm sure I can reproduce this in 0.90.1. Perhaps the test isn't producing the same problems since the JSON api is being used and the test is using the interfaces directly?

I can to a teamviewer if you'd like. Otherwise you can just pop open irb and copy/paste the ruby code in there.

gchat me sarmiena@gmail.com if you want to get ahold of me. otherwise i'm on IRC in #elasticsearch as sarmiena_ (notice the underscore)

@sarmiena

This comment has been minimized.

Copy link

commented May 23, 2013

Ok looks like 0.90.1 does fix this issue. The formatting was a little off and I missed the record:

Please close.

# ruby code (irb)
(1).upto(100) do |i|
    `curl -XPUT 'http://localhost:9200/twitter/tweet/#{i}' -d '{ "user" : "#{i}"}'`
end

# command line
$ curl -X POST "http://localhost:9200/twitter/tweet/1" -d '
> {"user":"1"}
> '

$ curl -X GET 'http://localhost:9200/twitter/tweet/_search?pretty' -d '
{
  "sort": [
    {
      "user": "asc"
    }
  ],
  "size": 10,
  "from": 0
}
'
{
  "took" : 8,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 100,
    "max_score" : null,
    "hits" : [ {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "1",
      "_score" : null, "_source" :
{"user":"1"}
,
      "sort" : [ "1" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "10",
      "_score" : null, "_source" : { "user" : "10"},
      "sort" : [ "10" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "100",
      "_score" : null, "_source" : { "user" : "100"},
      "sort" : [ "100" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "11",
      "_score" : null, "_source" : { "user" : "11"},
      "sort" : [ "11" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "12",
      "_score" : null, "_source" : { "user" : "12"},
      "sort" : [ "12" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "13",
      "_score" : null, "_source" : { "user" : "13"},
      "sort" : [ "13" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "14",
      "_score" : null, "_source" : { "user" : "14"},
      "sort" : [ "14" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "15",
      "_score" : null, "_source" : { "user" : "15"},
      "sort" : [ "15" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "16",
      "_score" : null, "_source" : { "user" : "16"},
      "sort" : [ "16" ]
    }, {
      "_index" : "twitter",
      "_type" : "tweet",
      "_id" : "17",
      "_score" : null, "_source" : { "user" : "17"},
      "sort" : [ "17" ]
    } ]
  }
}
@s1monw

This comment has been minimized.

Copy link
Contributor

commented May 24, 2013

thanks for bringing clarification! good to work with you last night!

@s1monw s1monw closed this May 24, 2013

mute pushed a commit to mute/elasticsearch that referenced this issue Jul 29, 2015

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.