Skip to content

Commit

Permalink
Using fuzzywuzzy show similar queries (did you mean or suggestion) if…
Browse files Browse the repository at this point in the history
… query has no results - #34
  • Loading branch information
harishvc committed Feb 15, 2015
1 parent 6ad9c9a commit 140a3f2
Show file tree
Hide file tree
Showing 9 changed files with 636 additions and 21 deletions.
7 changes: 2 additions & 5 deletions DBQueries.py
Expand Up @@ -10,7 +10,6 @@

#Local modules
import RandomQuotes
import Suggestions
import Neo4jQueries
import MyMoment

Expand All @@ -19,7 +18,7 @@
#Configure for production or development based on environment variables
if (os.environ['deployEnv'] == "production"):
MONGO_URL = os.environ['connectURLRead']
connection = MongoClient(MONGO_URL,auto_start_request=False)
connection = MongoClient(MONGO_URL)
db = connection.githublive.pushevent
else:
MONGO_URL = os.environ['connectURLReaddev']
Expand All @@ -40,10 +39,8 @@
DE = "</div>"

def ProcessQuery(query):
global ShowSuggestion
ShowSuggestion = False
if (query == ""):
return ""
return "EMPTY"
else:
app.logger.debug("processing ............ ->%s<-" , query)
if (query == "active repositories"):
Expand Down
9 changes: 4 additions & 5 deletions RunFlask.py
Expand Up @@ -15,11 +15,11 @@
from json import dumps

#Local modules
import RandomQuotes
import Suggestions
import DBQueries

#Global variables
NORESULT="<div class=\"col-sm-12\"><p class=\"searchstatus text-danger\">You've got me stumped!</p></div>" #No result
NORESULT="<h2 class=\"searchstatus text-danger\">You've got me stumped!</h2>" #No result



Expand All @@ -37,8 +37,6 @@ def numformat(value):
def index():
query = ""
processed_text1 = ""
global ShowSuggestion
ShowSuggestion = False
#Debug
#time.sleep(5)
if request.method == 'GET':
Expand All @@ -54,7 +52,8 @@ def index():
#End: Uncomment to trigger slow response time
processed_text1 = DBQueries.ProcessQuery(query)
if (processed_text1 == "EMPTY") :
processed_text1 = NORESULT
t1 = Suggestions.compare("now") if (query == "") else Suggestions.compare(query)
processed_text1 = NORESULT + t1
else:
query =""
processed_text1 =""
Expand Down
44 changes: 33 additions & 11 deletions Suggestions.py
@@ -1,14 +1,36 @@
import random
#https://github.com/seatgeek/fuzzywuzzy
#https://pypi.python.org/pypi/fuzzywuzzy/0.4.0

def RandomQuerySuggestions():
foo = ["<a href=\'/?q=active+repositories&action=Search\'>active repositories</a>",
"<a href=\'/?q=active+users&action=Search\'>active users</a>",
"<a href=\'/?q=total+commits&action=Search\'>total commits</a>",
"<a href=\'/?q=trending+now&action=Search\'>trending now</a>",
"<a href=\'/?q=top+active+repositories+by+contributors&action=Search\'>top active repositories by contributors</a>",
"<a href=\'/?q=top+active+repositories+by+commits&action=Search\'>top active repositories by commits</a>",
"<a href=\'/?q=top+active+repositories+by+branches&action=Search\'>top active repositories by branches</a>"
]
return("Suggestion: " + random.choice(foo))
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

choices = ["active users",
"active repositories",
"total commits",
"trending now",
"top active repositories by contributors",
"top active repositories by branches",
"top active repositories by commits"]


def compare(input):
#print "comparing ....", input
r = process.extract(input, choices,limit=5)
suggestionList = ""
#Pick top 3 if more than 75% exact
if (r[0][1] >= 75):
suggestionList += "<p class=\"text-info\">Did you mean:</p><ul>"
cnt = 1
for row in r:
if (row[1] >= 75 and cnt <= 3):
cnt = cnt + 1
suggestionList += "<li><a href=\"/?q=" + str(row[0]) + "&amp;action=Search\">" + str(row[0]) + "</a></li>"
else:
break
suggestionList += "</ul>"
#Pick one if no exact
elif (r[0][1] >= 0):
suggestionList += "<p class=\"text-info\">Suggestion:</p><a href=\"/?q=" + str(r[0][0]) + "&amp;action=Search\">" + str(r[0][0]) + "</a>"

#print suggestionList
return suggestionList
78 changes: 78 additions & 0 deletions fuzzywuzzy/StringMatcher.py
@@ -0,0 +1,78 @@
#!/usr/bin/env python
# encoding: utf-8
"""
StringMatcher.py
ported from python-Levenshtein
[https://github.com/miohtama/python-Levenshtein]
"""

from Levenshtein import *
from warnings import warn

class StringMatcher:
"""A SequenceMatcher-like class built on the top of Levenshtein"""

def _reset_cache(self):
self._ratio = self._distance = None
self._opcodes = self._editops = self._matching_blocks = None

def __init__(self, isjunk=None, seq1='', seq2=''):
if isjunk:
warn("isjunk not NOT implemented, it will be ignored")
self._str1, self._str2 = seq1, seq2
self._reset_cache()

def set_seqs(self, seq1, seq2):
self._str1, self._str2 = seq1, seq2
self._reset_cache()

def set_seq1(self, seq1):
self._str1 = seq1
self._reset_cache()

def set_seq2(self, seq2):
self._str2 = seq2
self._reset_cache()

def get_opcodes(self):
if not self._opcodes:
if self._editops:
self._opcodes = opcodes(self._editops, self._str1, self._str2)
else:
self._opcodes = opcodes(self._str1, self._str2)
return self._opcodes

def get_editops(self):
if not self._editops:
if self._opcodes:
self._editops = editops(self._opcodes, self._str1, self._str2)
else:
self._editops = editops(self._str1, self._str2)
return self._editops

def get_matching_blocks(self):
if not self._matching_blocks:
self._matching_blocks = matching_blocks(self.get_opcodes(),
self._str1, self._str2)
return self._matching_blocks

def ratio(self):
if not self._ratio:
self._ratio = ratio(self._str1, self._str2)
return self._ratio

def quick_ratio(self):
# This is usually quick enough :o)
if not self._ratio:
self._ratio = ratio(self._str1, self._str2)
return self._ratio

def real_quick_ratio(self):
len1, len2 = len(self._str1), len(self._str2)
return 2.0 * min(len1, len2) / (len1 + len2)

def distance(self):
if not self._distance:
self._distance = distance(self._str1, self._str2)
return self._distance
2 changes: 2 additions & 0 deletions fuzzywuzzy/__init__.py
@@ -0,0 +1,2 @@
# -*- coding: utf-8 -*-
__version__ = '0.5.0'

0 comments on commit 140a3f2

Please sign in to comment.