# Decay function

This notebook demonstrates using functions to rank news articles. The aim is to impose time decay, i.e. older articles will have a lower score.

Relevant links:
* https://www.elastic.co/guide/en/elasticsearch/reference/5.4/query-dsl-function-score-query.html

In [1]:
import subprocess
import json

In [2]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds/_count" -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res)
res

{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},
 'count': 4718}

In [3]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds/_mapping" -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res)
res

{'rssfeeds': {'mappings': {'article': {'properties': {'categories': {'fields': {'keyword': {'ignore_above': 256,
        'type': 'keyword'}},
      'type': 'text'},
     'content': {'fields': {'processed': {'analyzer': 'custom_text_analyzer',
        'type': 'text'},
       'tagged': {'analyzer': 'custom_text_analyzer',
        'fielddata': True,
        'fielddata_frequency_filter': {'max': 0.1,
         'min': 0.001,
         'min_segment_size': 10},
        'type': 'text'}},
      'type': 'text'},
     'description': {'fields': {'processed': {'analyzer': 'custom_text_analyzer',
        'type': 'text'},
       'tagged': {'analyzer': 'custom_text_analyzer',
        'fielddata': True,
        'fielddata_frequency_filter': {'max': 0.1,
         'min': 0.001,
         'min_segment_size': 10},
        'type': 'text'}},
      'type': 'text'},
     'link': {'ignore_above': 256, 'type': 'keyword'},
     'published': {'type': 'date'},
     'resource_label': {'ignore_above': 256, 'type': 'keyw

In [4]:
term = "spyware"

In [5]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_test/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "content", "published"],
    "query": {
        "match": {
            "content": "{""" + term + """}"
        }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res, strict=False)
print(res['hits']['total'])
print()

for hit in res['hits']['hits']:
    print(hit['_score'], hit['_source']['published'], hit['_source']['title'])
    print('-'*80)

3

5.3452134 2018-05-08T13:00:14 Maikspy Spyware Poses as Adult Game, Targets Windows and Android Users
--------------------------------------------------------------------------------
4.414116 2018-05-14T14:01:55 North Korean Hackers Are Now Developing iPhone Spy Tools
--------------------------------------------------------------------------------
3.110219 2018-04-20T01:06:43 XLoader Android Spyware and Banking Trojan Distributed via DNS Spoofing
--------------------------------------------------------------------------------


In [6]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_test/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "content", "published"],
    "query": {
        "function_score": {
            "functions": [
                {
                    "gauss": {
                        "published": {
                            "origin": "now", 
                            "scale": "7d",
                            "offset": "7d", 
                            "decay" : 0.5 
                        }
                    }
                }
            ],
            "query": {   
                "match": {
                    "content": "{""" + term + """}"
                }
            }
        }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res, strict=False)
print(res['hits']['total'])
print()

for hit in res['hits']['hits']:
    print(hit['_score'], hit['_source']['published'], hit['_source']['title'])
    print('-'*80)

3

3.5921986 2018-05-14T14:01:55 North Korean Hackers Are Now Developing iPhone Spy Tools
--------------------------------------------------------------------------------
1.3513906 2018-05-08T13:00:14 Maikspy Spyware Poses as Adult Game, Targets Windows and Android Users
--------------------------------------------------------------------------------
3.57656e-05 2018-04-20T01:06:43 XLoader Android Spyware and Banking Trojan Distributed via DNS Spoofing
--------------------------------------------------------------------------------


In [7]:
query = """
curl -s -X GET "http://a3557701c4b3211e88f8a060fa4fdbf3-427558466.eu-west-3.elb.amazonaws.com/elasticsearch/rssfeeds_test/article/_search" -H 'Content-Type: application/json' -d'
{
    "_source": ["title", "content", "published"],
    "query": {
        "function_score": {
            "functions": [
                {
                    "exp": {
                        "published": {
                            "origin": "now", 
                            "scale": "7d",
                            "offset": "7d", 
                            "decay" : 0.5 
                        }
                    }
                }
            ],
            "query": {   
                "match": {
                    "content": "{""" + term + """}"
                }
            }
        }
    }
}
' -u guest:teradata
"""

res = subprocess.getoutput(query)
res = json.loads(res, strict=False)
print(res['hits']['total'])
print()

for hit in res['hits']['hits']:
    print(hit['_score'], hit['_source']['published'], hit['_source']['title'])
    print('-'*80)

3

3.0249496 2018-05-14T14:01:55 North Korean Hackers Are Now Developing iPhone Spy Tools
--------------------------------------------------------------------------------
2.0135927 2018-05-08T13:00:14 Maikspy Spyware Poses as Adult Game, Targets Windows and Android Users
--------------------------------------------------------------------------------
0.18767737 2018-04-20T01:06:43 XLoader Android Spyware and Banking Trojan Distributed via DNS Spoofing
--------------------------------------------------------------------------------
