From 140a3f240ebd198f491d023b87882eaee51fde7b Mon Sep 17 00:00:00 2001
From: harishvc <harishvc@gmail.com>
Date: Sat, 14 Feb 2015 20:42:37 -0800
Subject: [PATCH] Using fuzzywuzzy show similar queries (did you mean or
 suggestion) if query has no results - harishvc/githubanalytics#34

---
 DBQueries.py                    |   7 +-
 RunFlask.py                     |   9 +-
 Suggestions.py                  |  44 +++--
 fuzzywuzzy/StringMatcher.py     |  78 +++++++++
 fuzzywuzzy/__init__.py          |   2 +
 fuzzywuzzy/fuzz.py              | 284 ++++++++++++++++++++++++++++++++
 fuzzywuzzy/process.py           | 126 ++++++++++++++
 fuzzywuzzy/string_processing.py |  34 ++++
 fuzzywuzzy/utils.py             |  73 ++++++++
 9 files changed, 636 insertions(+), 21 deletions(-)
 create mode 100644 fuzzywuzzy/StringMatcher.py
 create mode 100644 fuzzywuzzy/__init__.py
 create mode 100644 fuzzywuzzy/fuzz.py
 create mode 100644 fuzzywuzzy/process.py
 create mode 100644 fuzzywuzzy/string_processing.py
 create mode 100644 fuzzywuzzy/utils.py
diff --git a/DBQueries.py b/DBQueries.py
index 77b0a54..8800b63 100755
--- a/DBQueries.py
+++ b/DBQueries.py
@@ -10,7 +10,6 @@
 
 #Local modules
 import RandomQuotes
-import Suggestions
 import Neo4jQueries
 import MyMoment
 
@@ -19,7 +18,7 @@
 #Configure for production or development based on environment variables
 if (os.environ['deployEnv'] == "production"):
     MONGO_URL = os.environ['connectURLRead']
-    connection = MongoClient(MONGO_URL,auto_start_request=False)
+    connection = MongoClient(MONGO_URL)
     db = connection.githublive.pushevent
 else: 
     MONGO_URL = os.environ['connectURLReaddev']
@@ -40,10 +39,8 @@
 DE = "</div>"
     
 def ProcessQuery(query):
-    global ShowSuggestion
-    ShowSuggestion = False
     if (query == ""):
-        return ""
+        return "EMPTY"
     else: 
         app.logger.debug("processing ............ ->%s<-" ,  query)
         if (query == "active repositories"):
diff --git a/RunFlask.py b/RunFlask.py
index 94a6118..4f54ef0 100755
--- a/RunFlask.py
+++ b/RunFlask.py
@@ -15,11 +15,11 @@
 from json import dumps
 
 #Local modules
-import RandomQuotes
+import Suggestions
 import DBQueries     
 
 #Global variables
-NORESULT="<div class=\"col-sm-12\"><p class=\"searchstatus text-danger\">You've got me stumped!</p></div>"    #No result
+NORESULT="<h2 class=\"searchstatus text-danger\">You've got me stumped!</h2>"    #No result
 
 
 
@@ -37,8 +37,6 @@ def numformat(value):
 def index():
     query = ""
     processed_text1  = ""
-    global ShowSuggestion
-    ShowSuggestion = False
     #Debug
     #time.sleep(5)
     if request.method == 'GET':
@@ -54,7 +52,8 @@ def index():
             #End: Uncomment to trigger slow response time
             processed_text1 = DBQueries.ProcessQuery(query)
             if (processed_text1 == "EMPTY") :
-                processed_text1 = NORESULT
+                t1 = Suggestions.compare("now") if (query == "") else Suggestions.compare(query)  
+                processed_text1 =  NORESULT + t1
     else:
         query =""
         processed_text1 =""
diff --git a/Suggestions.py b/Suggestions.py
index 5b52c4a..2c55c4f 100755
--- a/Suggestions.py
+++ b/Suggestions.py
@@ -1,14 +1,36 @@
-import random
+#https://github.com/seatgeek/fuzzywuzzy
+#https://pypi.python.org/pypi/fuzzywuzzy/0.4.0
 
-def RandomQuerySuggestions():
-   foo =    ["<a href=\'/?q=active+repositories&action=Search\'>active repositories</a>",
-            "<a href=\'/?q=active+users&action=Search\'>active users</a>",
-            "<a href=\'/?q=total+commits&action=Search\'>total commits</a>",
-            "<a href=\'/?q=trending+now&action=Search\'>trending now</a>",
-            "<a href=\'/?q=top+active+repositories+by+contributors&action=Search\'>top active repositories by contributors</a>",
-            "<a href=\'/?q=top+active+repositories+by+commits&action=Search\'>top active repositories by commits</a>",
-            "<a href=\'/?q=top+active+repositories+by+branches&action=Search\'>top active repositories by branches</a>"
-            ]
-   return("Suggestion: " + random.choice(foo))
+from fuzzywuzzy import fuzz
+from fuzzywuzzy import process
 
+choices = ["active users",
+           "active repositories",
+           "total commits",
+           "trending now",
+           "top active repositories by contributors",
+           "top active repositories by branches",
+           "top active repositories by commits"]
 
+
+def compare(input):
+    #print "comparing ....", input
+    r = process.extract(input, choices,limit=5)
+    suggestionList = ""
+    #Pick top 3 if more than 75% exact
+    if (r[0][1] >= 75):
+        suggestionList += "<p class=\"text-info\">Did you mean:</p><ul>"
+        cnt = 1
+        for row in r:
+            if (row[1] >= 75 and cnt <= 3):
+                cnt = cnt + 1
+                suggestionList += "<li><a href=\"/?q=" + str(row[0]) + "&amp;action=Search\">" + str(row[0]) + "</a></li>"
+            else:
+                break
+        suggestionList += "</ul>"
+    #Pick one if no exact       
+    elif (r[0][1] >= 0):
+        suggestionList += "<p class=\"text-info\">Suggestion:</p><a href=\"/?q=" + str(r[0][0]) + "&amp;action=Search\">" + str(r[0][0]) + "</a>"
+    
+    #print suggestionList
+    return suggestionList
diff --git a/fuzzywuzzy/StringMatcher.py b/fuzzywuzzy/StringMatcher.py
new file mode 100644
index 0000000..9dccfe7
--- /dev/null
+++ b/fuzzywuzzy/StringMatcher.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+StringMatcher.py
+
+ported from python-Levenshtein
+[https://github.com/miohtama/python-Levenshtein]
+"""
+
+from Levenshtein import *
+from warnings import warn
+
+class StringMatcher:
+    """A SequenceMatcher-like class built on the top of Levenshtein"""
+
+    def _reset_cache(self):
+        self._ratio = self._distance = None
+        self._opcodes = self._editops = self._matching_blocks = None
+
+    def __init__(self, isjunk=None, seq1='', seq2=''):
+        if isjunk:
+            warn("isjunk not NOT implemented, it will be ignored")
+        self._str1, self._str2 = seq1, seq2
+        self._reset_cache()
+
+    def set_seqs(self, seq1, seq2):
+        self._str1, self._str2 = seq1, seq2
+        self._reset_cache()
+
+    def set_seq1(self, seq1):
+        self._str1 = seq1
+        self._reset_cache()
+
+    def set_seq2(self, seq2):
+        self._str2 = seq2
+        self._reset_cache()
+
+    def get_opcodes(self):
+        if not self._opcodes:
+            if self._editops:
+                self._opcodes = opcodes(self._editops, self._str1, self._str2)
+            else:
+                self._opcodes = opcodes(self._str1, self._str2)
+        return self._opcodes
+
+    def get_editops(self):
+        if not self._editops:
+            if self._opcodes:
+                self._editops = editops(self._opcodes, self._str1, self._str2)
+            else:
+                self._editops = editops(self._str1, self._str2)
+        return self._editops
+
+    def get_matching_blocks(self):
+        if not self._matching_blocks:
+            self._matching_blocks = matching_blocks(self.get_opcodes(),
+                                                    self._str1, self._str2)
+        return self._matching_blocks
+
+    def ratio(self):
+        if not self._ratio:
+            self._ratio = ratio(self._str1, self._str2)
+        return self._ratio
+
+    def quick_ratio(self):
+        # This is usually quick enough :o)
+        if not self._ratio:
+            self._ratio = ratio(self._str1, self._str2)
+        return self._ratio
+
+    def real_quick_ratio(self):
+        len1, len2 = len(self._str1), len(self._str2)
+        return 2.0 * min(len1, len2) / (len1 + len2)
+
+    def distance(self):
+        if not self._distance:
+            self._distance = distance(self._str1, self._str2)
+        return self._distance
\ No newline at end of file
diff --git a/fuzzywuzzy/__init__.py b/fuzzywuzzy/__init__.py
new file mode 100644
index 0000000..4543f94
--- /dev/null
+++ b/fuzzywuzzy/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+__version__ = '0.5.0'
diff --git a/fuzzywuzzy/fuzz.py b/fuzzywuzzy/fuzz.py
new file mode 100644
index 0000000..62c8f19
--- /dev/null
+++ b/fuzzywuzzy/fuzz.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+fuzz.py
+
+Copyright (c) 2011 Adam Cohen
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+from __future__ import unicode_literals
+import warnings
+
+try:
+    from .StringMatcher import StringMatcher as SequenceMatcher
+except ImportError:
+    warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')
+    from difflib import SequenceMatcher
+
+from . import utils
+
+
+###########################
+# Basic Scoring Functions #
+###########################
+
+def ratio(s1, s2):
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+    s1, s2 = utils.make_type_consistent(s1, s2)
+
+    if len(s1) == 0 or len(s2) == 0:
+        return 0
+
+    m = SequenceMatcher(None, s1, s2)
+    return utils.intr(100 * m.ratio())
+
+
+# todo: skip duplicate indexes for a little more speed
+def partial_ratio(s1, s2):
+    """"Return the ratio of the most similar substring
+    as a number between 0 and 100."""
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+    s1, s2 = utils.make_type_consistent(s1, s2)
+    if len(s1) == 0 or len(s2) == 0:
+        return 0
+
+    if len(s1) <= len(s2):
+        shorter = s1
+        longer = s2
+    else:
+        shorter = s2
+        longer = s1
+
+    m = SequenceMatcher(None, shorter, longer)
+    blocks = m.get_matching_blocks()
+
+    # each block represents a sequence of matching characters in a string
+    # of the form (idx_1, idx_2, len)
+    # the best partial match will block align with at least one of those blocks
+    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
+    #   block = (1,3,3)
+    #   best score === ratio("abcd", "Xbcd")
+    scores = []
+    for block in blocks:
+        long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
+        long_end = long_start + len(shorter)
+        long_substr = longer[long_start:long_end]
+
+        m2 = SequenceMatcher(None, shorter, long_substr)
+        r = m2.ratio()
+        if r > .995:
+            return 100
+        else:
+            scores.append(r)
+
+    return int(100 * max(scores))
+
+
+##############################
+# Advanced Scoring Functions #
+##############################
+
+def _process_and_sort(s, force_ascii):
+    """Return a cleaned string with token sorted."""
+    # pull tokens
+    tokens = utils.full_process(s, force_ascii=force_ascii).split()
+
+    # sort tokens and join
+    sorted_string = u" ".join(sorted(tokens))
+    return sorted_string.strip()
+
+# Sorted Token
+#   find all alphanumeric tokens in the string
+#   sort those tokens and take ratio of resulting joined strings
+#   controls for unordered string elements
+def _token_sort(s1, s2, partial=True, force_ascii=True):
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+
+    sorted1 = _process_and_sort(s1, force_ascii)
+    sorted2 = _process_and_sort(s2, force_ascii)
+
+    if partial:
+        return partial_ratio(sorted1, sorted2)
+    else:
+        return ratio(sorted1, sorted2)
+
+def token_sort_ratio(s1, s2, force_ascii=True):
+    """Return a measure of the sequences' similarity between 0 and 100
+    but sorting the token before comparing.
+    """
+    return _token_sort(s1, s2, partial=False, force_ascii=force_ascii)
+
+
+def partial_token_sort_ratio(s1, s2, force_ascii=True):
+    """Return the ratio of the most similar substring as a number between
+    0 and 100 but sorting the token before comparing.
+    """
+    return _token_sort(s1, s2, partial=True, force_ascii=force_ascii)
+
+
+def _token_set(s1, s2, partial=True, force_ascii=True):
+    """Find all alphanumeric tokens in each string...
+        - treat them as a set
+        - construct two strings of the form:
+            <sorted_intersection><sorted_remainder>
+        - take ratios of those two strings
+        - controls for unordered partial matches"""
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+
+    p1 = utils.full_process(s1, force_ascii=force_ascii)
+    p2 = utils.full_process(s2, force_ascii=force_ascii)
+
+    if not utils.validate_string(p1):
+        return 0
+    if not utils.validate_string(p2):
+        return 0
+
+    # pull tokens
+    tokens1 = set(utils.full_process(p1).split())
+    tokens2 = set(utils.full_process(p2).split())
+
+    intersection = tokens1.intersection(tokens2)
+    diff1to2 = tokens1.difference(tokens2)
+    diff2to1 = tokens2.difference(tokens1)
+
+    sorted_sect = " ".join(sorted(intersection))
+    sorted_1to2 = " ".join(sorted(diff1to2))
+    sorted_2to1 = " ".join(sorted(diff2to1))
+
+    combined_1to2 = sorted_sect + " " + sorted_1to2
+    combined_2to1 = sorted_sect + " " + sorted_2to1
+
+    # strip
+    sorted_sect = sorted_sect.strip()
+    combined_1to2 = combined_1to2.strip()
+    combined_2to1 = combined_2to1.strip()
+
+    if partial:
+        ratio_func = partial_ratio
+    else:
+        ratio_func = ratio
+
+    pairwise = [
+        ratio_func(sorted_sect, combined_1to2),
+        ratio_func(sorted_sect, combined_2to1),
+        ratio_func(combined_1to2, combined_2to1)
+    ]
+    return max(pairwise)
+
+
+def token_set_ratio(s1, s2, force_ascii=True):
+    return _token_set(s1, s2, partial=False, force_ascii=force_ascii)
+
+
+def partial_token_set_ratio(s1, s2, force_ascii=True):
+    return _token_set(s1, s2, partial=True, force_ascii=force_ascii)
+
+
+# TODO: numerics
+
+###################
+# Combination API #
+###################
+
+# q is for quick
+def QRatio(s1, s2, force_ascii=True):
+
+    p1 = utils.full_process(s1, force_ascii=force_ascii)
+    p2 = utils.full_process(s2, force_ascii=force_ascii)
+
+    if not utils.validate_string(p1):
+        return 0
+    if not utils.validate_string(p2):
+        return 0
+
+    return ratio(p1, p2)
+
+
+def UQRatio(s1, s2):
+    return QRatio(s1, s2, force_ascii=False)
+
+
+# w is for weighted
+def WRatio(s1, s2, force_ascii=True):
+    """Return a measure of the sequences' similarity between 0 and 100,
+    using different algorithms.
+    """
+
+    p1 = utils.full_process(s1, force_ascii=force_ascii)
+    p2 = utils.full_process(s2, force_ascii=force_ascii)
+
+    if not utils.validate_string(p1):
+        return 0
+    if not utils.validate_string(p2):
+        return 0
+
+    # should we look at partials?
+    try_partial = True
+    unbase_scale = .95
+    partial_scale = .90
+
+    base = ratio(p1, p2)
+    len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2))
+
+    # if strings are similar length, don't use partials
+    if len_ratio < 1.5:
+        try_partial = False
+
+    # if one string is much much shorter than the other
+    if len_ratio > 8:
+        partial_scale = .6
+
+    if try_partial:
+        partial = partial_ratio(p1, p2) * partial_scale
+        ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) \
+            * unbase_scale * partial_scale
+        ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) \
+            * unbase_scale * partial_scale
+
+        return int(max(base, partial, ptsor, ptser))
+    else:
+        tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
+        tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
+
+        return int(max(base, tsor, tser))
+
+
+def UWRatio(s1, s2):
+    """Return a measure of the sequences' similarity between 0 and 100,
+    using different algorithms. Same as WRatio but preserving unicode.
+    """
+    return WRatio(s1, s2, force_ascii=False)
diff --git a/fuzzywuzzy/process.py b/fuzzywuzzy/process.py
new file mode 100644
index 0000000..bbc38a9
--- /dev/null
+++ b/fuzzywuzzy/process.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+process.py
+
+Copyright (c) 2011 Adam Cohen
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+import itertools
+
+from . import fuzz
+from . import utils
+
+
+def extract(query, choices, processor=None, scorer=None, limit=5):
+    """Find best matches in a list or dictionary of choices, return a
+    list of tuples containing the match and its score. If a dictionery
+    is used, also returns the key for each match.
+
+    Arguments:
+        query       -- an object representing the thing we want to find
+        choices     -- a list or dictionary of objects we are attempting
+                       to extract values from. The dictionary should
+                       consist of {key: str} pairs.
+        scorer      -- f(OBJ, QUERY) --> INT. We will return the objects
+                       with the highest score by default, we use
+                       score.WRatio() and both OBJ and QUERY should be
+                       strings
+        processor   -- f(OBJ_A) --> OBJ_B, where the output is an input
+                       to scorer for example, "processor = lambda x:
+                       x[0]" would return the first element in a
+                       collection x (of, say, strings) this would then
+                       be used in the scoring collection by default, we
+                       use utils.full_process()
+
+    """
+    if choices is None:
+        return []
+
+    # Catch generators without lengths
+    try:
+        if len(choices) == 0:
+            return []
+    except TypeError:
+        pass
+
+    # default, turn whatever the choice is into a workable string
+    if not processor:
+        processor = utils.full_process
+
+    # default: wratio
+    if not scorer:
+        scorer = fuzz.WRatio
+
+    sl = list()
+
+    if isinstance(choices, dict):
+        for key, choice in choices.items():
+            processed = processor(choice)
+            score = scorer(query, processed)
+            tuple = (choice, score, key)
+            sl.append(tuple)
+
+    else:
+        for choice in choices:
+            processed = processor(choice)
+            score = scorer(query, processed)
+            tuple = (choice, score)
+            sl.append(tuple)
+
+    sl.sort(key=lambda i: i[1], reverse=True)
+    return sl[:limit]
+
+
+def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5):
+    """Find best matches above a score in a list of choices, return a
+    list of tuples containing the match and its score.
+
+    Convenience method which returns the choices with best scores, see
+    extract() for full arguments list
+
+    Optional parameter: score_cutoff.
+        If the choice has a score of less than or equal to score_cutoff
+        it will not be included on result list
+
+    """
+    best_list = extract(query, choices, processor, scorer, limit)
+    return list(itertools.takewhile(lambda x: x[1] >= score_cutoff, best_list))
+
+
+def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0):
+    """Find the best match above a score in a list of choices, return a
+    tuple containing the match and its score if it's above the threshold
+    or None.
+
+    Convenience method which returns the single best choice, see
+    extract() for full arguments list
+
+    Optional parameter: score_cutoff.
+        If the best choice has a score of less than or equal to
+        score_cutoff we will return none (intuition: not a good enough
+        match)
+
+    """
+    best_list = extract(query, choices, processor, scorer, limit=1)
+    if len(best_list) > 0 and best_list[0][1] >= score_cutoff:
+        return best_list[0]
+    return None
diff --git a/fuzzywuzzy/string_processing.py b/fuzzywuzzy/string_processing.py
new file mode 100644
index 0000000..0fee736
--- /dev/null
+++ b/fuzzywuzzy/string_processing.py
@@ -0,0 +1,34 @@
+from __future__ import unicode_literals
+import re
+import string
+import sys
+
+
+PY3 = sys.version_info[0] == 3
+
+
+class StringProcessor(object):
+    """
+    This class defines method to process strings in the most
+    efficient way. Ideally all the methods below use unicode strings
+    for both input and output.
+    """
+
+    regex = re.compile(r"(?ui)\W")
+
+    @classmethod
+    def replace_non_letters_non_numbers_with_whitespace(cls, a_string):
+        """
+        This function replaces any sequence of non letters and non
+        numbers with a single white space.
+        """
+        return cls.regex.sub(u" ", a_string)
+
+    if PY3:
+        strip = staticmethod(str.strip)
+        to_lower_case = staticmethod(str.lower)
+        to_upper_case = staticmethod(str.upper)
+    else:
+        strip = staticmethod(string.strip)
+        to_lower_case = staticmethod(string.lower)
+        to_upper_case = staticmethod(string.upper)
diff --git a/fuzzywuzzy/utils.py b/fuzzywuzzy/utils.py
new file mode 100644
index 0000000..2c68824
--- /dev/null
+++ b/fuzzywuzzy/utils.py
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+import sys
+
+from fuzzywuzzy.string_processing import StringProcessor
+
+
+PY3 = sys.version_info[0] == 3
+
+
+
+def validate_string(s):
+    try:
+        return len(s) > 0
+    except TypeError:
+        return False
+
+bad_chars = str("").join([chr(i) for i in range(128, 256)])  # ascii dammit!
+if PY3:
+    translation_table = dict((ord(c), None) for c in bad_chars)
+
+
+def asciionly(s):
+    if PY3:
+        return s.translate(translation_table)
+    else:
+        return s.translate(None, bad_chars)
+
+
+def asciidammit(s):
+    if type(s) is str:
+        return asciionly(s)
+    elif type(s) is unicode:
+        return asciionly(s.encode('ascii', 'ignore'))
+    else:
+        return asciidammit(unicode(s))
+
+
+def make_type_consistent(s1, s2):
+    """If both objects aren't either both string or unicode instances force them to unicode"""
+    if isinstance(s1, str) and isinstance(s2, str):
+        return s1, s2
+
+    elif isinstance(s1, unicode) and isinstance(s2, unicode):
+        return s1, s2
+
+    else:
+        return unicode(s1), unicode(s2)
+
+
+def full_process(s, force_ascii=False):
+    """Process string by
+        -- removing all but letters and numbers
+        -- trim whitespace
+        -- force to lower case
+        if force_ascii == True, force convert to ascii"""
+
+    if s is None:
+        return ""
+
+    if force_ascii:
+        s = asciidammit(s)
+    # Keep only Letters and Numbers (see Unicode docs).
+    string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
+    # Force into lowercase.
+    string_out = StringProcessor.to_lower_case(string_out)
+    # Remove leading and trailing whitespaces.
+    string_out = StringProcessor.strip(string_out)
+    return string_out
+
+
+def intr(n):
+    '''Returns a correctly rounded integer'''
+    return int(round(n))