From 943134c593f475a63c971a4aaf01c5eec81bce75 Mon Sep 17 00:00:00 2001
From: Tom De Smedt <tom@organisms.be>
Date: Mon, 27 Oct 2014 16:35:10 +0100
Subject: [PATCH] Python 3 support: use next() instead of .next() + define
 xrange()

---
 pattern/graph/__init__.py        |  2 +-
 pattern/metrics.py               | 76 +++++++++++++++++++++++++++++---
 pattern/text/search.py           | 11 ++---
 pattern/vector/__init__.py       | 29 +++++++++---
 pattern/vector/stemmer.py        |  2 +-
 pattern/web/__init__.py          |  2 +-
 pattern/web/json/ordered_dict.py |  4 +-
 7 files changed, 104 insertions(+), 22 deletions(-)

diff --git a/pattern/graph/__init__.py b/pattern/graph/__init__.py
index 826b34c3..38f80729 100644
--- a/pattern/graph/__init__.py
+++ b/pattern/graph/__init__.py
@@ -1077,7 +1077,7 @@ def clique(graph, id):
     for n in graph.nodes:
         try:
             # Raises StopIteration if all nodes in the clique are connected to n:
-            (id for id in a if n.id==id or graph.edge(n.id, id) is None).next()
+            next(id for id in a if n.id==id or graph.edge(n.id, id) is None)
         except StopIteration:
             a.append(n.id)
     return a
diff --git a/pattern/metrics.py b/pattern/metrics.py
index 2c71d25a..8e8ce682 100644
--- a/pattern/metrics.py
+++ b/pattern/metrics.py
@@ -8,15 +8,18 @@
 import sys
 
 from time import time
-from math import sqrt, floor, modf, exp, pi, log
+from math import sqrt, floor, ceil, modf, exp, pi, log
 
 from collections import defaultdict, deque
 from itertools   import chain
-from operator    import itemgetter
+from operator    import itemgetter, lt, le
 from heapq       import nlargest
 from bisect      import bisect_right
 from random      import gauss
 
+if sys.version > "3":
+    xrange = range
+
 ####################################################################################################
 # Simple implementation of Counter for Python 2.5 and 2.6.
 # See also: http://code.activestate.com/recipes/576611/
@@ -280,7 +283,7 @@ def levenshtein(string1, string2):
     if n > m: 
         # Make sure n <= m to use O(min(n,m)) space.
         string1, string2, n, m = string2, string1, m, n
-    current = range(n+1)
+    current = list(xrange(n+1))
     for i in xrange(1, m+1):
         previous, current = current, [i]+[0]*n
         for j in xrange(1, n+1):
@@ -570,6 +573,65 @@ def cooccurrence(iterable, window=(-1,-1), term1=lambda x: True, term2=lambda x:
 # Adjectives preceding nouns:
 # {("cat", "NN"): {("black", "JJ"): 1}}
 
+#### INTERPOLATION #################################################################################
+
+def lerp(a, b, t):
+    """ Returns the linear interpolation between a and b at time t between 0.0-1.0.
+        For example: lerp(100, 200, 0.5) => 150.
+    """
+    if t < 0.0:
+        return a
+    if t > 1.0:
+        return b
+    return a + (b - a) * t
+    
+def smoothstep(a, b, x):
+    """ Returns the Hermite interpolation (cubic spline) for x between a and b.
+        The return value between 0.0-1.0 eases (slows down) as x nears a or b.
+    """
+    if x < a: 
+        return 0.0
+    if x >= b: 
+        return 1.0
+    x = float(x - a) / (b - a)
+    return x * x * (3 - 2 * x)
+
+def smoothrange(a=None, b=None, n=10):
+    """ Returns an iterator of approximately n values v1, v2, ... vn,
+        so that v1 <= a, and vn >= b, and all values are multiples of 1, 2, 5 and 10.
+        For example: list(smoothrange(1, 123)) => [0, 20, 40, 60, 80, 100, 120, 140],
+    """
+    def _multiple(v, round=False):
+        e = floor(log(v, 10)) # exponent
+        m = pow(10, e)        # magnitude
+        f = v / m             # fraction
+        if round is True:
+            op, x, y, z = lt, 1.5, 3.0, 7.0
+        if round is False:
+            op, x, y, z = le, 1.0, 2.0, 5.0
+        if op(f, x):
+            return m * 1
+        if op(f, y):
+            return m * 2
+        if op(f, z):
+            return m * 5
+        else:
+            return m * 10
+    if a is None and b is None:
+        a, b = 0, 1
+    if a is None:
+        a, b = 0, b
+    if b is None:
+        a, b = 0, a
+    if a == b:
+        yield float(a); raise StopIteration
+    r = _multiple(b - a)
+    t = _multiple(r / (n - 1), round=True)
+    a = floor(a / t) * t
+    b =  ceil(b / t) * t
+    for i in range(int((b - a) / t) + 1):
+        yield a + i * t
+
 #### STATISTICS ####################################################################################
 
 #--- MEAN ------------------------------------------------------------------------------------------
@@ -688,7 +750,7 @@ def kurtosis(iterable, sample=False):
 
 #a = 1
 #b = 1000
-#U = [float(i-a)/(b-a) for i in range(a,b)] # uniform distribution
+#U = [float(i-a)/(b-a) for i in xrange(a,b)] # uniform distribution
 #print(abs(-1.2 - kurtosis(U)) < 0.0001)
 
 #--- QUANTILE --------------------------------------------------------------------------------------
@@ -715,7 +777,7 @@ def quantile(iterable, p=0.5, sort=True, a=1, b=-1, c=0, d=1):
     i = int(floor(i))
     return s[i] + (s[i+1] - s[i]) * (c + d * f)
 
-#print(quantile(range(10), p=0.5) == median(range(10)))
+#print(quantile(xrange(10), p=0.5) == median(xrange(10)))
 
 def boxplot(iterable, **kwargs):
     """ Returns a tuple (min(list), Q1, Q2, Q3, max(list)) for the given list of values.
@@ -918,7 +980,7 @@ def gammaln(x):
     y = x + 5.5
     y = (x + 0.5) * log(y) - y
     n = 1.0
-    for i in range(6):
+    for i in xrange(6):
         x += 1
         n += (
           76.18009173, 
@@ -1048,6 +1110,6 @@ def kolmogorov(x):
         return 0.0
     x = -2.0 * x * x
     k = 0
-    for i in reversed(range(1, 27+1, 2)): # 27 25 23 ... 1
+    for i in reversed(xrange(1, 27+1, 2)): # 27 25 23 ... 1
         k = (1 - k) * exp(x * i)
     return 2.0 * k
diff --git a/pattern/text/search.py b/pattern/text/search.py
index dd1c937b..5410a863 100644
--- a/pattern/text/search.py
+++ b/pattern/text/search.py
@@ -174,7 +174,7 @@ def variations(iterable, optional=lambda x: False):
     for p in product([False, True], repeat=sum(o)):
         p = list(p)
         v = [b and (b and p.pop(0)) for b in o]
-        v = tuple(iterable[i] for i in xrange(len(v)) if not v[i])
+        v = tuple(iterable[i] for i in range(len(v)) if not v[i])
         a.add(v)
     # Longest-first.
     return sorted(a, cmp=lambda x, y: len(y) - len(x))
@@ -567,7 +567,7 @@ def match(self, word):
             - the word (or lemma) occurs in Constraint.taxa taxonomy tree, AND
             - the word and/or chunk tags match those defined in the constraint.
             Individual terms in Constraint.words or the taxonomy can contain wildcards (*).
-            Some part-of-speech-tags can also contain wildcards: NN*, VB*, JJ*, RB*
+            Some part-of-speech-tags can also contain wildcards: NN*, VB*, JJ*, RB*, PR*.
             If the given word contains spaces (e.g., proper noun),
             the entire chunk will also be compared.
             For example: Constraint(words=["Mac OS X*"]) 
@@ -609,8 +609,8 @@ def match(self, word):
                 try:
                     if " " in w and (s1 in w or s2 and s2 in w or "*" in w):
                         s1 = word.chunk and word.chunk.string.lower() or s1
-                        s2 = word.chunk and " ".join([x or "" for x in word.chunk.lemmata]) or s2
-                except:
+                        s2 = word.chunk and " ".join(x or ""  for x in word.chunk.lemmata) or s2
+                except Exception as e:
                     s1 = s1
                     s2 = None
                 # Compare the word to the allowed words (which can contain wildcards).
@@ -620,6 +620,7 @@ def match(self, word):
                 # if "was" is not in the constraint, perhaps "be" is, which is a good match.
                 if s2 and _match(s2, w):
                     b=True; break
+                    
         # If the constraint defines allowed taxonomy terms,
         # and the given word did not match an allowed word, traverse the taxonomy.
         # The search goes up from the given word to its parents in the taxonomy.
@@ -804,7 +805,7 @@ def match(self, sentence, start=0, _v=None, _u=None):
         if sentence.__class__.__name__ == "Sentence":
             pass
         elif isinstance(sentence, list) or sentence.__class__.__name__ == "Text":
-            return find(lambda m,s: m is not None, ((self.match(s, start, _v), s) for s in sentence))[0]
+            return find(lambda m: m is not None, (self.match(s, start, _v) for s in sentence))
         elif isinstance(sentence, basestring):
             sentence = Sentence(sentence)
         elif isinstance(sentence, Match) and len(sentence) > 0:
diff --git a/pattern/vector/__init__.py b/pattern/vector/__init__.py
index 35af9007..511236c8 100644
--- a/pattern/vector/__init__.py
+++ b/pattern/vector/__init__.py
@@ -45,6 +45,7 @@
 
 if sys.version > "3":
     long = int
+    xrange = range
 
 try:
     MODULE = os.path.dirname(os.path.realpath(__file__))
@@ -1717,7 +1718,7 @@ def transform(self, document):
 _lsa_transform_cache = {}
 
 #def iter2array(iterator, typecode):
-#    a = numpy.array([iterator.next()], typecode)
+#    a = numpy.array([next(iterator)], typecode)
 #    shape0 = a.shape[1:]
 #    for (i, item) in enumerate(iterator):
 #        a.resize((i+2,) + shape0)
@@ -1961,7 +1962,7 @@ def hierarchical(vectors, k=1, iterations=1000, distance=COSINE, **kwargs):
     id = sequence()
     features  = kwargs.get("features", _features(vectors))
     clusters  = Cluster((v for v in shuffled(vectors)))
-    centroids = [(id.next(), v) for v in clusters]
+    centroids = [(next(id), v) for v in clusters]
     map = {}
     for _ in range(iterations):
         if len(clusters) <= max(k, 1): 
@@ -1988,7 +1989,7 @@ def hierarchical(vectors, k=1, iterations=1000, distance=COSINE, **kwargs):
         v = centroid(merged.flatten(), features)
         centroids.pop(j)
         centroids.pop(i)
-        centroids.append((id.next(), v))
+        centroids.append((next(id), v))
     return clusters
 
 #from pattern.vector import Vector
@@ -2081,6 +2082,16 @@ def baseline(self):
             return self._baseline
         return ([(0, None)] + sorted([(v, k) for k, v in self._classes.items()]))[-1][1]
         
+    @property
+    def weighted_random_baseline(self):
+        """ Yields the weighted random baseline:
+            accuracy with classes predicted randomly according to their distribution.
+        """
+        n = float(sum(self.distribution.values())) or 1
+        return sum(map(lambda x: (x / n) ** 2, self.distribution.values()))
+    
+    wrb = weighted_random_baseline
+        
     @property
     def skewness(self):
         """ Yields 0.0 if the trained classes are evenly distributed.
@@ -2974,8 +2985,6 @@ def finalize(self):
 
 ANN = NN = NeuralNetwork = BPNN
 
-
-
 #nn = BPNN()
 #nn._weight_initialization(2, 1, hidden=2)
 #nn._train([
@@ -3070,6 +3079,16 @@ def __init__(self, *args, **kwargs):
             (  "shrinking", "h", True)):
                 v = kwargs.get(k2, kwargs.get(k1, v))
                 setattr(self, "_"+k1, v)
+        # SVC/SVR/SVO alias.
+        if self._type == "svc":
+            self._type = SVC
+        if self._type == "svr":
+            self._type = SVR
+        if self._type == "svo":
+            self._type = SVO
+        # RBF alias.
+        if self._kernel == "rbf":
+            self._kernel = RBF
         Classifier.__init__(self, train=kwargs.get("train", []), baseline=MAJORITY)
     
     @property
diff --git a/pattern/vector/stemmer.py b/pattern/vector/stemmer.py
index 0c7f3c02..430a3e3a 100644
--- a/pattern/vector/stemmer.py
+++ b/pattern/vector/stemmer.py
@@ -286,7 +286,7 @@ def case_sensitive(stem, word):
         Ponies => Poni
     """
     ch = []
-    for i in xrange(len(stem)):
+    for i in range(len(stem)):
         if word[i] == word[i].upper():
             ch.append(stem[i].upper())
         else:
diff --git a/pattern/web/__init__.py b/pattern/web/__init__.py
index dca0082d..f15810ca 100644
--- a/pattern/web/__init__.py
+++ b/pattern/web/__init__.py
@@ -2357,7 +2357,7 @@ def articles(self, **kwargs):
             while True:
                 batch, done = [], False
                 try:
-                    for i in range(10): batch.append(iterator.next())
+                    for i in range(10): batch.append(next(iterator))
                 except StopIteration:
                     done = True # No more articles, finish batch and raise StopIteration.
                 url = URL(self._url.replace("api.php", "wikia.php"), method=GET, query={
diff --git a/pattern/web/json/ordered_dict.py b/pattern/web/json/ordered_dict.py
index 87ad8882..c1b5492e 100644
--- a/pattern/web/json/ordered_dict.py
+++ b/pattern/web/json/ordered_dict.py
@@ -66,9 +66,9 @@ def popitem(self, last=True):
         # Modified from original to support Python 2.4, see
         # http://code.google.com/p/simplejson/issues/detail?id=53
         if last:
-            key = reversed(self).next()
+            key = next(reversed(self))
         else:
-            key = iter(self).next()
+            key = next(iter(self))
         value = self.pop(key)
         return key, value