In [None]:
import json
import numpy as np

from flask import Flask, request


def tf(document, w):
    # len(document['pos']) - number of term appearances in the text
    # len(document['pos_title']) - number of term appearances in the title
    return np.log(1 + (len(document['pos']) + w*len(document['pos_title']))
                  / document['doc_length'])

In [None]:
app = Flask(__name__)
'''
Service for ranking.
Input - list with relevant docIDS, list with tokens 
from the query and slice of inverted index.
Output - list with ranked docIDs.    

Cosine between normalized tf_idf vector of query 
and normilized tf_idf vector of document is used for ranking.
If query length is equal to one we use pure tf_idf.

Before using this service for ranking you must create idf 
from inverted index by using ReverseIndex Service.
'''
    
@app.route('/rank', methods =['POST'])
def ranking():
    j = request.json
    inverted_index = j['data']
    # list with relevant docID:
    documents = j['documents']
    # list with tokens from the query:
    words = j['words']
    # weight for the term in the title:
    w = 5
    
    # tf_idf vector for query:
    tf_idf_query = [idf[term] / len(set(words)) for term in words]
    # normalization tf_idf vector for query:
    norma = (sum([i ** 2 for i in tf_idf_query]))**.5
    tf_idf_query = [tf_idf / norma for tf_idf in tf_idf_query]
    print('tf_idf_query', tf_idf_query)          
           
    # create tf_idf for docs -> {docID1: [tf_idf_for_term1, tf_idf_for_term2],...}
    tf_idf = {key: [] for key in documents}   
    for term in words:  
        print(term,)
        idf_for_term = idf[term]
        print(idf_for_term,)
        for doc in inverted_index[term]:
            if doc['docID'] in documents:
                tf_idf[doc['docID']].append(tf(doc, w) * idf_for_term)
    print(tf_idf) 
    
    # normalization tf_idf vectors (make sense if query length is bigger than one word)
    if len(set(words)) > 1:       
        for docID in tf_idf.keys():
            tf_idf[docID] /= (sum([i**2 for i in tf_idf[docID]]))**.5
        print('normilized')
        print(tf_idf) 
    
        # cos between tf_idf vector of query and tf_idf vector of document
        cos = {key: 0 for key in documents}
        for docID in documents:
            cos[docID] = sum([v[0]*v[1] for v in zip(tf_idf_query, tf_idf[docID])])
        print('cos', cos)
    else:
        # if query length == one word -> we're using pure tf_idf
        cos = {i[0]: i[1][0] for i in tf_idf.items()}
        print('cos', cos)
    
    # sorting documents
    ranked = sorted(cos.items(), key=lambda kv: kv[1], reverse = True)
    ranked = [d[0] for d in ranked]
    print('ranked',ranked)
      
    return json.dumps({'status':'ok', 'ranked': ranked})

@app.route("/rank/idf", methods=['POST'])
def refresh_idf():
    global idf
    j = request.json
    # idf -> {term:idf}
    idf = j['data']
    return json.dumps({'status':'ok'})

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=13541)