Using fuzzywuzzy show similar queries (did you mean or suggestion) if…

… query has no results - #34
harishvc · Feb 15, 2015 · 140a3f2 · 140a3f2
1 parent 6ad9c9a
commit 140a3f2
Show file tree

Hide file tree

Showing 9 changed files with 636 additions and 21 deletions.
diff --git a/DBQueries.py b/DBQueries.py
@@ -10,7 +10,6 @@
 
 #Local modules
 import RandomQuotes
-import Suggestions
 import Neo4jQueries
 import MyMoment
 
@@ -19,7 +18,7 @@
 #Configure for production or development based on environment variables
 if (os.environ['deployEnv'] == "production"):
     MONGO_URL = os.environ['connectURLRead']
-    connection = MongoClient(MONGO_URL,auto_start_request=False)
+    connection = MongoClient(MONGO_URL)
     db = connection.githublive.pushevent
 else: 
     MONGO_URL = os.environ['connectURLReaddev']
@@ -40,10 +39,8 @@
 DE = "</div>"
 
 def ProcessQuery(query):
-    global ShowSuggestion
-    ShowSuggestion = False
     if (query == ""):
-        return ""
+        return "EMPTY"
     else: 
         app.logger.debug("processing ............ ->%s<-" ,  query)
         if (query == "active repositories"):

diff --git a/RunFlask.py b/RunFlask.py
@@ -15,11 +15,11 @@
 from json import dumps
 
 #Local modules
-import RandomQuotes
+import Suggestions
 import DBQueries     
 
 #Global variables
-NORESULT="<div class=\"col-sm-12\"><p class=\"searchstatus text-danger\">You've got me stumped!</p></div>"    #No result
+NORESULT="<h2 class=\"searchstatus text-danger\">You've got me stumped!</h2>"    #No result
 
 
 
@@ -37,8 +37,6 @@ def numformat(value):
 def index():
     query = ""
     processed_text1  = ""
-    global ShowSuggestion
-    ShowSuggestion = False
     #Debug
     #time.sleep(5)
     if request.method == 'GET':
@@ -54,7 +52,8 @@ def index():
             #End: Uncomment to trigger slow response time
             processed_text1 = DBQueries.ProcessQuery(query)
             if (processed_text1 == "EMPTY") :
-                processed_text1 = NORESULT
+                t1 = Suggestions.compare("now") if (query == "") else Suggestions.compare(query)  
+                processed_text1 =  NORESULT + t1
     else:
         query =""
         processed_text1 =""

diff --git a/Suggestions.py b/Suggestions.py
@@ -1,14 +1,36 @@
-import random
+#https://github.com/seatgeek/fuzzywuzzy
+#https://pypi.python.org/pypi/fuzzywuzzy/0.4.0
 
-def RandomQuerySuggestions():
-   foo =    ["<a href=\'/?q=active+repositories&action=Search\'>active repositories</a>",
-            "<a href=\'/?q=active+users&action=Search\'>active users</a>",
-            "<a href=\'/?q=total+commits&action=Search\'>total commits</a>",
-            "<a href=\'/?q=trending+now&action=Search\'>trending now</a>",
-            "<a href=\'/?q=top+active+repositories+by+contributors&action=Search\'>top active repositories by contributors</a>",
-            "<a href=\'/?q=top+active+repositories+by+commits&action=Search\'>top active repositories by commits</a>",
-            "<a href=\'/?q=top+active+repositories+by+branches&action=Search\'>top active repositories by branches</a>"
-            ]
-   return("Suggestion: " + random.choice(foo))
+from fuzzywuzzy import fuzz
+from fuzzywuzzy import process
 
+choices = ["active users",
+           "active repositories",
+           "total commits",
+           "trending now",
+           "top active repositories by contributors",
+           "top active repositories by branches",
+           "top active repositories by commits"]
 
+
+def compare(input):
+    #print "comparing ....", input
+    r = process.extract(input, choices,limit=5)
+    suggestionList = ""
+    #Pick top 3 if more than 75% exact
+    if (r[0][1] >= 75):
+        suggestionList += "<p class=\"text-info\">Did you mean:</p><ul>"
+        cnt = 1
+        for row in r:
+            if (row[1] >= 75 and cnt <= 3):
+                cnt = cnt + 1
+                suggestionList += "<li><a href=\"/?q=" + str(row[0]) + "&amp;action=Search\">" + str(row[0]) + "</a></li>"
+            else:
+                break
+        suggestionList += "</ul>"
+    #Pick one if no exact       
+    elif (r[0][1] >= 0):
+        suggestionList += "<p class=\"text-info\">Suggestion:</p><a href=\"/?q=" + str(r[0][0]) + "&amp;action=Search\">" + str(r[0][0]) + "</a>"
+
+    #print suggestionList
+    return suggestionList
diff --git a/fuzzywuzzy/StringMatcher.py b/fuzzywuzzy/StringMatcher.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+StringMatcher.py
+
+ported from python-Levenshtein
+[https://github.com/miohtama/python-Levenshtein]
+"""
+
+from Levenshtein import *
+from warnings import warn
+
+class StringMatcher:
+    """A SequenceMatcher-like class built on the top of Levenshtein"""
+
+    def _reset_cache(self):
+        self._ratio = self._distance = None
+        self._opcodes = self._editops = self._matching_blocks = None
+
+    def __init__(self, isjunk=None, seq1='', seq2=''):
+        if isjunk:
+            warn("isjunk not NOT implemented, it will be ignored")
+        self._str1, self._str2 = seq1, seq2
+        self._reset_cache()
+
+    def set_seqs(self, seq1, seq2):
+        self._str1, self._str2 = seq1, seq2
+        self._reset_cache()
+
+    def set_seq1(self, seq1):
+        self._str1 = seq1
+        self._reset_cache()
+
+    def set_seq2(self, seq2):
+        self._str2 = seq2
+        self._reset_cache()
+
+    def get_opcodes(self):
+        if not self._opcodes:
+            if self._editops:
+                self._opcodes = opcodes(self._editops, self._str1, self._str2)
+            else:
+                self._opcodes = opcodes(self._str1, self._str2)
+        return self._opcodes
+
+    def get_editops(self):
+        if not self._editops:
+            if self._opcodes:
+                self._editops = editops(self._opcodes, self._str1, self._str2)
+            else:
+                self._editops = editops(self._str1, self._str2)
+        return self._editops
+
+    def get_matching_blocks(self):
+        if not self._matching_blocks:
+            self._matching_blocks = matching_blocks(self.get_opcodes(),
+                                                    self._str1, self._str2)
+        return self._matching_blocks
+
+    def ratio(self):
+        if not self._ratio:
+            self._ratio = ratio(self._str1, self._str2)
+        return self._ratio
+
+    def quick_ratio(self):
+        # This is usually quick enough :o)
+        if not self._ratio:
+            self._ratio = ratio(self._str1, self._str2)
+        return self._ratio
+
+    def real_quick_ratio(self):
+        len1, len2 = len(self._str1), len(self._str2)
+        return 2.0 * min(len1, len2) / (len1 + len2)
+
+    def distance(self):
+        if not self._distance:
+            self._distance = distance(self._str1, self._str2)
+        return self._distance
diff --git a/fuzzywuzzy/__init__.py b/fuzzywuzzy/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+__version__ = '0.5.0'