Skip to content

Commit

Permalink
Merge branch 'improve-recall' into hpc-experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
daoudclarke committed Mar 6, 2015
2 parents 639bdeb + 55c37df commit f60442a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 9 deletions.
5 changes: 4 additions & 1 deletion fbsearch/connect.py
Expand Up @@ -15,6 +15,7 @@

import sys
import json
import re
import sexpdata

STOPWORDS = set("""
Expand All @@ -23,6 +24,7 @@
they this to was will with who what where
""".split())

ALLOWED_CHARS_PATTERN = re.compile('[\W_]+', re.UNICODE)

class Connector(object):
def __init__(self):
Expand Down Expand Up @@ -52,7 +54,8 @@ def query_search(self, query):
if result is not None:
logger.debug("Found entities in query cache")
return result
query_terms = [term for term in query.split() if term not in STOPWORDS]
normalised_query = ALLOWED_CHARS_PATTERN.sub(' ', query)
query_terms = [term for term in normalised_query.split() if term not in STOPWORDS]
logger.debug("Getting query entities for query terms: %r", query_terms)
all_entities = []
subqueries = []
Expand Down
6 changes: 6 additions & 0 deletions tests/test_connect.py
Expand Up @@ -49,5 +49,11 @@ def test_long_query_search_finds_china():
ids = set(result[1]['id'] for result in results)
assert 'fb:en.china' in ids

def test_query_search_with_brackets():
query = "which country has adopted the euro as its currency ( 1 point )?"
results = connector.query_search(query)
print results
assert len(results) > 0

def teardown_module():
connector.related.save_cache()
15 changes: 7 additions & 8 deletions tests/test_oracle.py
Expand Up @@ -55,14 +55,13 @@ def test_oracle_finds_ishmael():
oracle.connector.related.save_cache()
assert len(set(result) & set(target)) > 0

def test_oracle_finds_chers_son():
query, target = "what is cher's son's name?", [u"Elijah Blue Allman", u"Chaz Bono"]

# def test_oracle_finds_anne():
# query = "which wife did king henry behead?"
# target = [u"Anne of the Thousand Days"]
dataset = [(query, target)]

# dataset = [(query, target)]
oracle = OracleSystem(dataset)
result = oracle.execute(query)
oracle.connector.related.save_cache()
assert len(set(result) & set(target)) > 0

# oracle = OracleSystem(dataset)
# result = oracle.execute(query)
# oracle.connector.related.save_cache()
# assert len(set(result) & set(target)) > 0

0 comments on commit f60442a

Please sign in to comment.