In [None]:
import requests

In [None]:
# Index TMDB Corpus
from ltr.client.solr_client import SolrClient

client = SolrClient(host='http://localhost:8983/solr')

from ltr.index import rebuild
from ltr.helpers.movies import indexable_movies
movies=indexable_movies(movies='data/tmdb_ai_pow_search.json')
rebuild(client, index='tmdb', doc_src=movies)

In [None]:
def judg_csv(judgment):
    from ltr.helpers.movies import get_movie
    judgedMovie = get_movie(movies='data/tmdb_ai_pow_search.json', tmdb_id=judgment.doc_id)
    return "{grade},'{title}',{keywords}".format(grade=judgment.grade,
                                                 title=judgedMovie['title'],
                                                 keywords=judgment.keywords)



In [None]:
from ltr.client.solr_client import SolrClient

client = SolrClient(host='http://localhost:8983/solr')

In [None]:
from ltr.judgments import Judgment

Judgment(grade=1, keywords='social network', doc_id=37799)

In [None]:

mini_judg_list=[
    # for 'social network' query
    Judgment(grade=1, keywords='social network', doc_id='37799'), #The Social Network
    Judgment(grade=0, keywords='social network', doc_id='267752'), # #chicagoGirl
    Judgment(grade=0, keywords='social network', doc_id='38408'), # Life As We Know It
    Judgment(grade=0, keywords='social network', doc_id='28303'), # The Cheyenne Social Club
    
    # for 'star wars' query
    Judgment(grade=1, keywords='star wars', doc_id='11'), # star wars
    Judgment(grade=1, keywords='star wars', doc_id='1892'), # return of jedi
    Judgment(grade=0, keywords='star wars', doc_id='54138'),# Star Trek Into Darkness
    Judgment(grade=0, keywords='star wars', doc_id='85783'), # The Star
    Judgment(grade=0, keywords='star wars', doc_id='325553'), # Battlestar Galactica
]

for j in mini_judg_list:
    print(judg_csv(j))

In [None]:
from ltr.judgments import judgments_open

with judgments_open('data/dummy_judgments.txt', 'w') as judgment_writer:
    for j in mini_judg_list:
        judgment_writer.write(j)

print(open('data/dummy_judgments.txt').read())

In [None]:
mini_judg_list[0].features


In [None]:
feature_set = [
    {
      "name" : "title_bm25",
      "store": "movies",
      "class" : "org.apache.solr.ltr.feature.SolrFeature",
      "params" : { #q=title:({$keywords})
        "q" : "title:(${keywords})"
      }
    },
    {
      "name" : "overview_bm25",
      "store": "movies",
      "class" : "org.apache.solr.ltr.feature.SolrFeature",
      "params" : {
        "q" : "overview:(${keywords})"
      }
    },
    {
      "name" : "vote_average",
      "store": "movies",
      "class" : "org.apache.solr.ltr.feature.SolrFeature",
      "params" : {
        "q" : "{!func}vote_average"
}}]

requests.put('http://localhost:8983/solr/tmdb/schema/feature-store',
             json=feature_set)


In [None]:
from ltr.log import FeatureLogger
from ltr.judgments import judgments_open
from itertools import groupby

ftr_logger=FeatureLogger(client, index='tmdb', feature_set='movies')
for qid, query_judgments in groupby(mini_judg_list, key=lambda j: j.qid):
    ftr_logger.log_for_qid(judgments=query_judgments, 
                           qid=qid)
        
ftr_logger.logged

In [None]:
logging_solr_query = {
    "fl": "id,title,[features store=movies efi.keywords=\"social network\"]",
    'q': "id:37799 OR id:267752 id:38408 OR id:28303", #social network graded documents
    'rows': 10,
    'wt': 'json'  
}

resp = requests.post('http://localhost:8983/solr/tmdb/select',
                     data=logging_solr_query)

resp.json()

In [None]:
# Save off features for qid=1
CURR_QID=1

solr_json = resp.json()
doc_id_to_features = {}

# Map Doc Id => Features
for doc in solr_json['response']['docs']:
    # Parse '[features] array', ie
    # title_bm25=0.0,overview_bm25=13.237938,vote_average=7.0'
    features = doc['[features]']
    features = features.split(',')
    features = [float(ftr.split('=')[1]) for ftr in features]
    
    doc_id_to_features[doc['id']] = features

# Save in correct judgment
for judgment in mini_judg_list:
    if judgment.qid == CURR_QID:
        try:
            judgment.features = doc_id_to_features[judgment.doc_id]
        except KeyError:
            pass
    

In [None]:
from ltr.log import FeatureLogger
from ltr.judgments import judgments_open
from itertools import groupby

ftr_logger=FeatureLogger(client, index='tmdb', feature_set='movies')
for qid, query_judgments in groupby(mini_judg_list, key=lambda j: j.qid):
    ftr_logger.log_for_qid(judgments=query_judgments, 
                           qid=qid)
        
ftr_logger.logged

In [None]:
from ltr import download

judgments='http://es-learn-to-rank.labs.o19s.com/title_judgments_binary.txt'
download([judgments], dest='data/')

ftr_logger=FeatureLogger(client, index='tmdb', feature_set='movies')

with judgments_open('data/title_judgments.txt') as judgment_list:
    for qid, query_judgments in groupby(judgment_list, key=lambda j: j.qid):
        ftr_logger.log_for_qid(judgments=query_judgments, 
                               qid=qid,
                               keywords=judgment_list.keywords(qid))
        
ftr_logger.logged