# Weighting the search results with human feedback

This notebook shows how to introduce and use a new field to integrate a human feedback (like/dislike button) into the search relevance score.

Links:
* updating field values https://www.elastic.co/guide/en/elasticsearch/reference/current/_updating_documents.html
* updating field values https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update-by-query.html
* weighting the search result scores https://www.elastic.co/guide/en/elasticsearch/guide/current/boosting-by-popularity.html
* sorting https://www.elastic.co/guide/en/elasticsearch/guide/current/_sorting.html

In [2]:
import subprocess
import json

In [17]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "published"],
    "query": {
        "match_all": {}
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res



In [19]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "published"],
    "query": {
        "range" : {
            "published" : {
                "gte" : "now-7d/d"
            }
        }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res

'{"took":79,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":2700,"max_score":1.0,"hits":[{"_index":"rssfeeds","_type":"article","_id":"48e8f996340e03115b762df86ef930f4db494cb0","_score":1.0,"_source":{"published":"2018-05-25T00:47:00","title":"All Source Counter-IED Intelligence Analyst in Charlottesville, VA"}},{"_index":"rssfeeds","_type":"article","_id":"7f41081416587d7cfe992c60590d06cad6c840e2","_score":1.0,"_source":{"published":"2018-05-25T00:47:00","title":"Healthcare cybersecurity spending to exceed $65B over the next 5 years"}},{"_index":"rssfeeds","_type":"article","_id":"718de0af2ea4da5b82c6e592eb143cdd4b777164","_score":1.0,"_source":{"published":"2018-05-25T00:47:00","title":"C-SCoRE Program Teaches Cadets Cybersecurity Skills | Institute for Information Security & Privacy"}},{"_index":"rssfeeds","_type":"article","_id":"07bf6551e841a5d438b235190db6bda19d17e581","_score":1.0,"_source":{"published":"2018-05-25T00:47:00","title":

In [3]:
query = """
curl -s -X DELETE "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback" -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res)
res

{'error': {'index': 'rssfeeds_feedback',
  'index_uuid': '_na_',
  'reason': 'no such index',
  'resource.id': 'rssfeeds_feedback',
  'resource.type': 'index_or_alias',
  'root_cause': [{'index': 'rssfeeds_feedback',
    'index_uuid': '_na_',
    'reason': 'no such index',
    'resource.id': 'rssfeeds_feedback',
    'resource.type': 'index_or_alias',
    'type': 'index_not_found_exception'}],
  'type': 'index_not_found_exception'},
 'status': 404}

In [4]:
query = """
curl -s -X POST "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/_reindex" -H 'Content-Type: application/json' -d'
{
  "source": {
    "index": "rssfeeds"
  },
  "dest": {
    "index": "rssfeeds_feedback"
  }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)

In [25]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/_count" -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res)
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'count': 7282}

In [22]:
query = """
curl -s -X PUT "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/_mapping/article" -H 'Content-Type: application/json' -d'
{
    "article" : {
        "properties" : {
            "feedback": {
               "type": "integer"
            }
        }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res

'{"acknowledged":true}'

In [24]:
query = """
curl -s -X POST "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/_update_by_query" -H 'Content-Type: application/json' -d'
{
    "query" : {
        "range" : {
            "published" : {
                "gte" : "now-7d/d"
            }
        }
    },
    "script" : "ctx._source.feedback = 1"
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res

'{"took":1715,"timed_out":false,"total":2692,"updated":2692,"deleted":0,"batches":3,"version_conflicts":0,"noops":0,"retries":{"bulk":0,"search":0},"throttled_millis":0,"requests_per_second":-1.0,"throttled_until_millis":0,"failures":[]}'

In [44]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "feedback"],
    "query": {
        "range" : {
            "published" : {
                "gte" : "now-7d/d"
            }
        }
    },
    "sort": {
        "published": { "order": "desc" },
        "_score": { "order": "desc" }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res, strict=False)
for hit in res['hits']['hits']:
    print(hit['_score'], hit['_id'], hit['_source']['feedback'], hit['_source']['title'])
    print('-'*80)

1.0 76b4bd9c72b2cd11c2dd90c4df3930584d125484 1 State-sponsored cyber attacks deserve tougher responses: ASPI report
--------------------------------------------------------------------------------
1.0 72bc82d51c4f85b54e88179a69f72fc02009d89c 1 Yes, Germany BND foreign intelligence service can spy on the world’s biggest internet exchange
--------------------------------------------------------------------------------
1.0 eb46e918212cfbe001888854528e1fafcaa8a6f2 1 Nocturnal Stealer Lets Low-Skilled Cybercrooks Harvest Sensitive Info
--------------------------------------------------------------------------------
1.0 c2937ebdab5d0606419399539779b15731b9f3ab 1 Leveraging GRC Technology to Avert Cyber Attacks
--------------------------------------------------------------------------------
1.0 a9a7fb2777652779a06fc97a04e73a7e595088ba 1 PM Modi interacts with students in Singapore
--------------------------------------------------------------------------------
1.0 3b357956e7c5d302d3a2a41c57a3

In [45]:
query = """
curl -s -X POST "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/article/cfa04d834b44508b406ab294fb089424788cbe5c/_update?pretty" -H 'Content-Type: application/json' -d'
{
  "script" : "ctx._source.feedback += 1"
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res

'{\n  "_index" : "rssfeeds_feedback",\n  "_type" : "article",\n  "_id" : "cfa04d834b44508b406ab294fb089424788cbe5c",\n  "_version" : 4,\n  "result" : "updated",\n  "_shards" : {\n    "total" : 2,\n    "successful" : 2,\n    "failed" : 0\n  },\n  "_seq_no" : 1932,\n  "_primary_term" : 1\n}'

In [49]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "feedback"],
    "query": {
        "range" : {
            "published" : {
                "gte" : "now-7d/d"
            }
        }
    },
    "sort": {
        "published": { "order": "desc" },
        "_score": { "order": "desc" }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res, strict=False)
for hit in res['hits']['hits']:
    print(hit['_score'], hit['_id'], hit['_source']['feedback'], hit['_source']['title'])
    print('-'*80)

1.0 76b4bd9c72b2cd11c2dd90c4df3930584d125484 1 State-sponsored cyber attacks deserve tougher responses: ASPI report
--------------------------------------------------------------------------------
1.0 72bc82d51c4f85b54e88179a69f72fc02009d89c 1 Yes, Germany BND foreign intelligence service can spy on the world’s biggest internet exchange
--------------------------------------------------------------------------------
1.0 eb46e918212cfbe001888854528e1fafcaa8a6f2 1 Nocturnal Stealer Lets Low-Skilled Cybercrooks Harvest Sensitive Info
--------------------------------------------------------------------------------
1.0 c2937ebdab5d0606419399539779b15731b9f3ab 1 Leveraging GRC Technology to Avert Cyber Attacks
--------------------------------------------------------------------------------
1.0 a9a7fb2777652779a06fc97a04e73a7e595088ba 1 PM Modi interacts with students in Singapore
--------------------------------------------------------------------------------
1.0 3b357956e7c5d302d3a2a41c57a3

In [48]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_feedback/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "feedback"],
    "query": {
        "function_score": {
            "query": {
                "range" : {
                    "published" : {
                        "gte" : "now-7d/d"
                    }
                }
            },
            "field_value_factor": { 
                "field": "feedback"
            }
        }
    }
    },
    "sort": {
        "published": { "order": "desc" },
        "_score": { "order": "desc" }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res, strict=False)
for hit in res['hits']['hits']:
    print(hit['_score'], hit['_id'], hit['_source']['feedback'], hit['_source']['title'])
    print('-'*80)

4.0 f2293a753a5dcddaba7a3c1a060b9672beec8324 4 Breakdown of the EFAIL Email Vulnerabilities
--------------------------------------------------------------------------------
3.0 cfa04d834b44508b406ab294fb089424788cbe5c 3 National team competition is vital for preserving European sports model
--------------------------------------------------------------------------------
1.0 48e8f996340e03115b762df86ef930f4db494cb0 1 All Source Counter-IED Intelligence Analyst in Charlottesville, VA
--------------------------------------------------------------------------------
1.0 7f41081416587d7cfe992c60590d06cad6c840e2 1 Healthcare cybersecurity spending to exceed $65B over the next 5 years
--------------------------------------------------------------------------------
1.0 718de0af2ea4da5b82c6e592eb143cdd4b777164 1 C-SCoRE Program Teaches Cadets Cybersecurity Skills | Institute for Information Security & Privacy
--------------------------------------------------------------------------------
1.0 07