In [None]:
import requests
import json

from ltr.client import ElasticClient
client = ElasticClient()

host = client.get_host()
host

In [None]:
# Rebuild the index if necessary
from ltr import download
from ltr.index import rebuild
from ltr.helpers.movies import indexable_movies

corpus='http://es-learn-to-rank.labs.o19s.com/tmdb.json'
download([corpus], dest='data/');

movies=indexable_movies(movies='data/tmdb.json')
rebuild(client, index='tmdb', doc_src=movies)

## Init Default Feature Store
The feature store can be removed by sending a DELETE request to `_ltr` endpoint.

In [None]:
url = 'http://{}:9200/_ltr/'.format(host)
print(url)
requests.delete(url)

To initialize the LTR plugin, issue a PUT request to the `_ltr` endpoint.

In [None]:
url = 'http://{}:9200/_ltr/'.format(host)
print(url)
requests.put(url)

## Create Feature Set

A feature set can be created by issuing a PUT to `_ltr/featureset/[feature_name]`

In [None]:
feature_set = {
   "featureset": {
      "features": [
         {
            "name": "title_bm25",
            "params": [
               "keywords"
            ],
            "template": {
                 "match": {
                    "title": "{{keywords}}"
                 }
           }
         },
         {
            "name": "overview_bm25",
            "params": [
               "keywords"
            ],
            "template": {
                     "match": {
                        "overview": "{{keywords}}"
                     }
               }
         }
      ]
   },
     "validation": {
      "index": "tmdb",
      "params": {
         "keywords": "rambo"
      }
   }
}

url = 'http://{}:9200/_ltr/_featureset/awesomefeatures'.format(host)
print(url)
request.delete(url)
resp = requests.put(url, json=feature_set)
resp.text

## Create Judgment List

In [None]:
from ltr.judgments import Judgment, judgments_to_file

# Grades for 'Rambo'
judgment_list = [Judgment(docId=1368, grade=1, keywords='rambo', qid=1),
                 Judgment(docId=1369, grade=1, keywords='rambo', qid=1),
                 Judgment(docId=1370, grade=1, keywords='rambo', qid=1),
                 Judgment(docId=7555, grade=1, keywords='rambo', qid=1),
                 Judgment(docId=11,   grade=0, keywords='rambo', qid=1),
                 Judgment(docId=1368,  grade=0,  keywords='star wars', qid=2),
                 Judgment(docId=19404, grade=0,  keywords='star wars', qid=2),
                 Judgment(docId=1370,  grade=0,  keywords='star wars', qid=2),
                 Judgment(docId=1892,  grade=1,  keywords='star wars', qid=2),
                 Judgment(docId=11,    grade=1,  keywords='star wars', qid=2)]

judgments_to_file(open('data/judgments.txt', 'w'), judgment_list)
!cat data/judgments.txt

## Log Judged Query-Docs To Build Training Set

If we have 4 judged documents: 7555,1370, 1369, and 1368 for keywords rambo:

```
doc_id, relevant?, keywords
1368, 1, rambo
1369, 1, rambo
1370, 1, rambo
7555, 1, rambo
11,   0, rambo
```


We need to get feature value for each row.

To do this, we utilize the logging extension to populate the judgment list with features for training.

In [None]:
for qid in [1,2]:
    qid_judgments = [j for j in judgment_list if j.qid == qid]

    my_ids = ["%s" % j.docId for j in qid_judgments]
    print("Logging %s - docs %s" % (qid_judgments[0].keywords, my_ids))

    search_with_log = {
      "query": {
        "bool": {
          "filter": [
            {
              "sltr": {
                "_name": "logged_features",    #<-- Elasticsearch named queries
                "featureset": "awesomefeatures",  #<-- "Logging mode" relative to feature set
                "params": {      #<-- parameters for what we're logging
                  "keywords": "rambo"
                }
              }
            },
             {
              "terms": {
                "_id": my_ids
              }
            }
          ]
        }
      },
      "ext": {
        "ltr_log": {
          "log_specs": {
            "name": "ltr_features",
            "named_query": "logged_features", #<-- refer back to query we want to config
            "missing_as_zero": True
          }
        }
      }
    }

    url = 'http://{}:9200/tmdb/_search'.format(host)
    print(url)
    resp = requests.get(url, json=search_with_log).json()

    for doc in resp['hits']['hits']:
        doc_id = doc['_id']
        ltr_features = doc['fields']['_ltrlog'][0]['ltr_features'] 
        feature_values = [feature['value'] for feature in ltr_features]
        print(doc_id, feature_values)
        for judgment in qid_judgments:
            if str(judgment.docId) == doc_id:
                judgment.features = feature_values
                


judgments_to_file(open('data/training.txt', 'w'), judgment_list)
!cat data/training.txt


In [None]:
download(["http://es-learn-to-rank.labs.o19s.com/title_judgments.txt", 
          "http://es-learn-to-rank.labs.o19s.com/RankyMcRankFace.jar"])

# Train a model

We won't do this here, but if you like you can try out training a model using Ranklib 

```
cd notebooks/elasticsearch/tmdb
java -jar data/RankyMcRankFace.jar -train data/title_judgments.txt -save data/model.txt

```

In [None]:
!java -jar data/RankyMcRankFace.jar -ranker 6 -train data/training.txt -save data/model.txt
!cat data/model.txt

## Uploading a Model
Once features have been logged and training data has been generated, a model can be pushed into Elasticsearch.  The following shows what a request to PUT a new model looks like.

In [None]:
model = """## LambdaMART
## No. of trees = 10
## No. of leaves = 10
## No. of threshold candidates = 256
## Learning rate = 0.1
## Stop early = 100

<ensemble>
	<tree id="1" weight="0.1">
		<split>
			<feature> 2 </feature>
			<threshold> 10.664251 </threshold>
			<split pos="left">
				<feature> 1 </feature>
				<threshold> 0.0 </threshold>
				<split pos="left">
					<output> -1.8305741548538208 </output>
				</split>
				<split pos="right">
					<feature> 2 </feature>
					<threshold> 9.502127 </threshold>
					<split pos="left">
						<feature> 1 </feature>
						<threshold> 7.0849166 </threshold>
						<split pos="left">
							<output> 0.23645669221878052 </output>
						</split>
						<split pos="right">
							<output> 1.7593677043914795 </output>
						</split>
					</split>
					<split pos="right">
						<output> 1.9719607830047607 </output>
					</split>
				</split>
			</split>
			<split pos="right">
				<feature> 2 </feature>
				<threshold> 0.0 </threshold>
				<split pos="left">
					<output> 1.3728954792022705 </output>
				</split>
				<split pos="right">
					<feature> 2 </feature>
					<threshold> 8.602512 </threshold>
					<split pos="left">
						<feature> 1 </feature>
						<threshold> 0.0 </threshold>
						<split pos="left">
							<feature> 2 </feature>
							<threshold> 13.815164 </threshold>
							<split pos="left">
								<output> 1.9401178359985352 </output>
							</split>
							<split pos="right">
								<output> 1.99532949924469 </output>
							</split>
						</split>
						<split pos="right">
							<feature> 1 </feature>
							<threshold> 11.085816 </threshold>
							<split pos="left">
								<output> 2.0 </output>
							</split>
							<split pos="right">
								<output> 1.99308180809021 </output>
							</split>
						</split>
					</split>
					<split pos="right">
						<output> 1.9870178699493408 </output>
					</split>
				</split>
			</split>
		</split>
	</tree>
</ensemble>
"""


create_model = {
  "model": {
     "name": "bad_model",
     "model": {
         "type": "model/ranklib",
         "definition": model
    }
  }
}

url = 'http://{}:9200/_ltr/_featureset/awesomefeatures/_createmodel'.format(host)
print(url)
requests.post(url, json=create_model).json()
print(json.dumps(create_model, indent=2))

## Searching with a Model
Now that a model has been uploaded to Elasticsearch we can use it to re-rank the results of a query.

In [None]:
search = {
  "query": {
      "sltr": {
          "params": {
              "keywords": "rambo"
          },
          "model": "bad_model"
      }
  }
}

url = 'http://{}:9200/tmdb/_search'.format(host)
resp = requests.get(url, json=search).json()

In [None]:
print(url)
for hit in resp['hits']['hits']:
    print(hit['_source']['title'])
