In [1]:
import pandas as pd
import requests
import numpy as np

In [2]:
SERVER = "http://192.168.1.10:5000/"
TABLE = 'up'

In [3]:
def search(query, en_mode):
    action = 'search_en' if en_mode else 'search'
    url = f"{SERVER}{action}?table_name={TABLE}&query_sentence={query}"

    headers = {
      'accept': 'application/json'
    }

    response = requests.request("GET", url, headers=headers)

    return response.json()

def get_reversed_rank(query, paper_id, parag_id, en_mode):
    concat_info = f'{paper_id}#{parag_id}'
    results = search(query, en_mode)
    return_value = 0
    for idx, row in enumerate(results):
        if concat_info in row['title']:
            return_value = 1 / (idx+1)
            break
    return return_value

# Eval over Farsi queries

In [4]:
df = pd.read_csv('./eval_parags_cleaned.csv')

results = []
for paper_id, parag_id, persian_clean in zip(df['paper_id'], df['parag_id'], df['persian_clean']):
    results.append(get_reversed_rank(persian_clean, paper_id, parag_id, False))

reusults = np.array(results)
mrr = np.average(results)
not_found = np.sum(np.array(results) == 0)
first_found = np.sum(np.array(results) == 1)

In [5]:
print(f'MRR is: {mrr}')
print(f'Not found count is: {not_found}')
print(f'First found count is: {first_found}')

MRR is: 0.14282581453634086
Not found count is: 201
First found count is: 22


# Eval over English queries

In [6]:
results = []
for paper_id, parag_id, en_con in zip(df['paper_id'], df['parag_id'], df['contcated']):
    results.append(get_reversed_rank(en_con, paper_id, parag_id, True))

reusults = np.array(results)
mrr = np.average(results)
not_found = np.sum(np.array(results) == 0)
first_found = np.sum(np.array(results) == 1)

In [7]:
print(f'MRR is: {mrr}')
print(f'Not found count is: {not_found}')
print(f'First found count is: {first_found}')

MRR is: 0.45053519214703425
Not found count is: 70
First found count is: 98
