From 25df9a25259f69dc21402447a0a9e0ffe8e43a0e Mon Sep 17 00:00:00 2001
From: Chipe1 <vedant.cacklur@students.iiit.ac.in>
Date: Tue, 18 Apr 2017 18:36:10 +0530
Subject: [PATCH 1/2] Modified relevant_pages()

---
 nlp.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/nlp.py b/nlp.py
index bd26d0a7b..268a2b155 100644
--- a/nlp.py
+++ b/nlp.py
@@ -301,15 +301,17 @@ def expand_pages(pages):
 
 
 def relevant_pages(query):
-    """Relevant pages are pages that contain the query in its entireity.
-    If a page's content contains the query it is returned by the function."""
-    relevant = {}
-    print("pagesContent in function: ", pagesContent)
-    for addr, page in pagesIndex.items():
-        if query.lower() in pagesContent[addr].lower():
-            relevant[addr] = page
-    return relevant
-
+    """Relevant pages are pages that contain all of the query words. They are obtained by 
+    intersecting the hit lists of the query words."""
+    hit_intersection = {addr for addr in pagesIndex}
+    query_words = query.split()
+    for query_word in query_words:
+        hit_list = set()
+        for addr in pagesIndex:
+            if query_word.lower() in pagesContent[addr].lower():
+                hit_list.add(addr)
+        hit_intersection = hit_intersection.intersection(hit_list)
+    return {addr: pagesIndex[addr] for addr in hit_intersection}
 
 def normalize(pages):
     """From the pseudocode: Normalize divides each page's score by the sum of

From 0429539a21f70c4dc0d58c29c19016663bae3deb Mon Sep 17 00:00:00 2001
From: Chipe1 <vedant.cacklur@students.iiit.ac.in>
Date: Tue, 18 Apr 2017 18:38:57 +0530
Subject: [PATCH 2/2] Additional tests for relevant_pages()

---
 tests/test_nlp.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/test_nlp.py b/tests/test_nlp.py
index 81eef882d..d0ce46fbc 100644
--- a/tests/test_nlp.py
+++ b/tests/test_nlp.py
@@ -30,7 +30,7 @@ def test_lexicon():
             href="https://google.com.au"
             < href="/wiki/TestThing" > href="/wiki/TestBoy"
             href="/wiki/TestLiving" href="/wiki/TestMan" >"""
-testHTML2 = "Nothing"
+testHTML2 = "a mom and a dad"
 testHTML3 = """
             <!DOCTYPE html>
             <html>
@@ -106,9 +106,13 @@ def test_expand_pages():
 
 
 def test_relevant_pages():
-    pages = relevant_pages("male")
-    assert all((x in pages.keys()) for x in ['A', 'C', 'E'])
+    pages = relevant_pages("his dad")
+    assert all((x in pages) for x in ['A', 'C', 'E'])
     assert all((x not in pages) for x in ['B', 'D', 'F'])
+    pages = relevant_pages("mom and dad")
+    assert all((x in pages) for x in ['A', 'B', 'C', 'D', 'E', 'F'])
+    pages = relevant_pages("philosophy")
+    assert all((x not in pages) for x in ['A', 'B', 'C', 'D', 'E', 'F'])
 
 
 def test_normalize():