Skip to content

Commit

Permalink
Python 3 support: use next() instead of .next() + define xrange()
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom De Smedt committed Oct 27, 2014
1 parent c4c6a9a commit 943134c
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pattern/graph/__init__.py
Expand Up @@ -1077,7 +1077,7 @@ def clique(graph, id):
for n in graph.nodes:
try:
# Raises StopIteration if all nodes in the clique are connected to n:
(id for id in a if n.id==id or graph.edge(n.id, id) is None).next()
next(id for id in a if n.id==id or graph.edge(n.id, id) is None)
except StopIteration:
a.append(n.id)
return a
Expand Down
76 changes: 69 additions & 7 deletions pattern/metrics.py
Expand Up @@ -8,15 +8,18 @@
import sys

from time import time
from math import sqrt, floor, modf, exp, pi, log
from math import sqrt, floor, ceil, modf, exp, pi, log

from collections import defaultdict, deque
from itertools import chain
from operator import itemgetter
from operator import itemgetter, lt, le
from heapq import nlargest
from bisect import bisect_right
from random import gauss

if sys.version > "3":
xrange = range

####################################################################################################
# Simple implementation of Counter for Python 2.5 and 2.6.
# See also: http://code.activestate.com/recipes/576611/
Expand Down Expand Up @@ -280,7 +283,7 @@ def levenshtein(string1, string2):
if n > m:
# Make sure n <= m to use O(min(n,m)) space.
string1, string2, n, m = string2, string1, m, n
current = range(n+1)
current = list(xrange(n+1))
for i in xrange(1, m+1):
previous, current = current, [i]+[0]*n
for j in xrange(1, n+1):
Expand Down Expand Up @@ -570,6 +573,65 @@ def cooccurrence(iterable, window=(-1,-1), term1=lambda x: True, term2=lambda x:
# Adjectives preceding nouns:
# {("cat", "NN"): {("black", "JJ"): 1}}

#### INTERPOLATION #################################################################################

def lerp(a, b, t):
""" Returns the linear interpolation between a and b at time t between 0.0-1.0.
For example: lerp(100, 200, 0.5) => 150.
"""
if t < 0.0:
return a
if t > 1.0:
return b
return a + (b - a) * t

def smoothstep(a, b, x):
""" Returns the Hermite interpolation (cubic spline) for x between a and b.
The return value between 0.0-1.0 eases (slows down) as x nears a or b.
"""
if x < a:
return 0.0
if x >= b:
return 1.0
x = float(x - a) / (b - a)
return x * x * (3 - 2 * x)

def smoothrange(a=None, b=None, n=10):
""" Returns an iterator of approximately n values v1, v2, ... vn,
so that v1 <= a, and vn >= b, and all values are multiples of 1, 2, 5 and 10.
For example: list(smoothrange(1, 123)) => [0, 20, 40, 60, 80, 100, 120, 140],
"""
def _multiple(v, round=False):
e = floor(log(v, 10)) # exponent
m = pow(10, e) # magnitude
f = v / m # fraction
if round is True:
op, x, y, z = lt, 1.5, 3.0, 7.0
if round is False:
op, x, y, z = le, 1.0, 2.0, 5.0
if op(f, x):
return m * 1
if op(f, y):
return m * 2
if op(f, z):
return m * 5
else:
return m * 10
if a is None and b is None:
a, b = 0, 1
if a is None:
a, b = 0, b
if b is None:
a, b = 0, a
if a == b:
yield float(a); raise StopIteration
r = _multiple(b - a)
t = _multiple(r / (n - 1), round=True)
a = floor(a / t) * t
b = ceil(b / t) * t
for i in range(int((b - a) / t) + 1):
yield a + i * t

#### STATISTICS ####################################################################################

#--- MEAN ------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -688,7 +750,7 @@ def kurtosis(iterable, sample=False):

#a = 1
#b = 1000
#U = [float(i-a)/(b-a) for i in range(a,b)] # uniform distribution
#U = [float(i-a)/(b-a) for i in xrange(a,b)] # uniform distribution
#print(abs(-1.2 - kurtosis(U)) < 0.0001)

#--- QUANTILE --------------------------------------------------------------------------------------
Expand All @@ -715,7 +777,7 @@ def quantile(iterable, p=0.5, sort=True, a=1, b=-1, c=0, d=1):
i = int(floor(i))
return s[i] + (s[i+1] - s[i]) * (c + d * f)

#print(quantile(range(10), p=0.5) == median(range(10)))
#print(quantile(xrange(10), p=0.5) == median(xrange(10)))

def boxplot(iterable, **kwargs):
""" Returns a tuple (min(list), Q1, Q2, Q3, max(list)) for the given list of values.
Expand Down Expand Up @@ -918,7 +980,7 @@ def gammaln(x):
y = x + 5.5
y = (x + 0.5) * log(y) - y
n = 1.0
for i in range(6):
for i in xrange(6):
x += 1
n += (
76.18009173,
Expand Down Expand Up @@ -1048,6 +1110,6 @@ def kolmogorov(x):
return 0.0
x = -2.0 * x * x
k = 0
for i in reversed(range(1, 27+1, 2)): # 27 25 23 ... 1
for i in reversed(xrange(1, 27+1, 2)): # 27 25 23 ... 1
k = (1 - k) * exp(x * i)
return 2.0 * k
11 changes: 6 additions & 5 deletions pattern/text/search.py
Expand Up @@ -174,7 +174,7 @@ def variations(iterable, optional=lambda x: False):
for p in product([False, True], repeat=sum(o)):
p = list(p)
v = [b and (b and p.pop(0)) for b in o]
v = tuple(iterable[i] for i in xrange(len(v)) if not v[i])
v = tuple(iterable[i] for i in range(len(v)) if not v[i])
a.add(v)
# Longest-first.
return sorted(a, cmp=lambda x, y: len(y) - len(x))
Expand Down Expand Up @@ -567,7 +567,7 @@ def match(self, word):
- the word (or lemma) occurs in Constraint.taxa taxonomy tree, AND
- the word and/or chunk tags match those defined in the constraint.
Individual terms in Constraint.words or the taxonomy can contain wildcards (*).
Some part-of-speech-tags can also contain wildcards: NN*, VB*, JJ*, RB*
Some part-of-speech-tags can also contain wildcards: NN*, VB*, JJ*, RB*, PR*.
If the given word contains spaces (e.g., proper noun),
the entire chunk will also be compared.
For example: Constraint(words=["Mac OS X*"])
Expand Down Expand Up @@ -609,8 +609,8 @@ def match(self, word):
try:
if " " in w and (s1 in w or s2 and s2 in w or "*" in w):
s1 = word.chunk and word.chunk.string.lower() or s1
s2 = word.chunk and " ".join([x or "" for x in word.chunk.lemmata]) or s2
except:
s2 = word.chunk and " ".join(x or "" for x in word.chunk.lemmata) or s2
except Exception as e:
s1 = s1
s2 = None
# Compare the word to the allowed words (which can contain wildcards).
Expand All @@ -620,6 +620,7 @@ def match(self, word):
# if "was" is not in the constraint, perhaps "be" is, which is a good match.
if s2 and _match(s2, w):
b=True; break

# If the constraint defines allowed taxonomy terms,
# and the given word did not match an allowed word, traverse the taxonomy.
# The search goes up from the given word to its parents in the taxonomy.
Expand Down Expand Up @@ -804,7 +805,7 @@ def match(self, sentence, start=0, _v=None, _u=None):
if sentence.__class__.__name__ == "Sentence":
pass
elif isinstance(sentence, list) or sentence.__class__.__name__ == "Text":
return find(lambda m,s: m is not None, ((self.match(s, start, _v), s) for s in sentence))[0]
return find(lambda m: m is not None, (self.match(s, start, _v) for s in sentence))
elif isinstance(sentence, basestring):
sentence = Sentence(sentence)
elif isinstance(sentence, Match) and len(sentence) > 0:
Expand Down
29 changes: 24 additions & 5 deletions pattern/vector/__init__.py
Expand Up @@ -45,6 +45,7 @@

if sys.version > "3":
long = int
xrange = range

try:
MODULE = os.path.dirname(os.path.realpath(__file__))
Expand Down Expand Up @@ -1717,7 +1718,7 @@ def transform(self, document):
_lsa_transform_cache = {}

#def iter2array(iterator, typecode):
# a = numpy.array([iterator.next()], typecode)
# a = numpy.array([next(iterator)], typecode)
# shape0 = a.shape[1:]
# for (i, item) in enumerate(iterator):
# a.resize((i+2,) + shape0)
Expand Down Expand Up @@ -1961,7 +1962,7 @@ def hierarchical(vectors, k=1, iterations=1000, distance=COSINE, **kwargs):
id = sequence()
features = kwargs.get("features", _features(vectors))
clusters = Cluster((v for v in shuffled(vectors)))
centroids = [(id.next(), v) for v in clusters]
centroids = [(next(id), v) for v in clusters]
map = {}
for _ in range(iterations):
if len(clusters) <= max(k, 1):
Expand All @@ -1988,7 +1989,7 @@ def hierarchical(vectors, k=1, iterations=1000, distance=COSINE, **kwargs):
v = centroid(merged.flatten(), features)
centroids.pop(j)
centroids.pop(i)
centroids.append((id.next(), v))
centroids.append((next(id), v))
return clusters

#from pattern.vector import Vector
Expand Down Expand Up @@ -2081,6 +2082,16 @@ def baseline(self):
return self._baseline
return ([(0, None)] + sorted([(v, k) for k, v in self._classes.items()]))[-1][1]

@property
def weighted_random_baseline(self):
""" Yields the weighted random baseline:
accuracy with classes predicted randomly according to their distribution.
"""
n = float(sum(self.distribution.values())) or 1
return sum(map(lambda x: (x / n) ** 2, self.distribution.values()))

wrb = weighted_random_baseline

@property
def skewness(self):
""" Yields 0.0 if the trained classes are evenly distributed.
Expand Down Expand Up @@ -2974,8 +2985,6 @@ def finalize(self):

ANN = NN = NeuralNetwork = BPNN



#nn = BPNN()
#nn._weight_initialization(2, 1, hidden=2)
#nn._train([
Expand Down Expand Up @@ -3070,6 +3079,16 @@ def __init__(self, *args, **kwargs):
( "shrinking", "h", True)):
v = kwargs.get(k2, kwargs.get(k1, v))
setattr(self, "_"+k1, v)
# SVC/SVR/SVO alias.
if self._type == "svc":
self._type = SVC
if self._type == "svr":
self._type = SVR
if self._type == "svo":
self._type = SVO
# RBF alias.
if self._kernel == "rbf":
self._kernel = RBF
Classifier.__init__(self, train=kwargs.get("train", []), baseline=MAJORITY)

@property
Expand Down
2 changes: 1 addition & 1 deletion pattern/vector/stemmer.py
Expand Up @@ -286,7 +286,7 @@ def case_sensitive(stem, word):
Ponies => Poni
"""
ch = []
for i in xrange(len(stem)):
for i in range(len(stem)):
if word[i] == word[i].upper():
ch.append(stem[i].upper())
else:
Expand Down
2 changes: 1 addition & 1 deletion pattern/web/__init__.py
Expand Up @@ -2357,7 +2357,7 @@ def articles(self, **kwargs):
while True:
batch, done = [], False
try:
for i in range(10): batch.append(iterator.next())
for i in range(10): batch.append(next(iterator))
except StopIteration:
done = True # No more articles, finish batch and raise StopIteration.
url = URL(self._url.replace("api.php", "wikia.php"), method=GET, query={
Expand Down
4 changes: 2 additions & 2 deletions pattern/web/json/ordered_dict.py
Expand Up @@ -66,9 +66,9 @@ def popitem(self, last=True):
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
if last:
key = reversed(self).next()
key = next(reversed(self))
else:
key = iter(self).next()
key = next(iter(self))
value = self.pop(key)
return key, value

Expand Down

0 comments on commit 943134c

Please sign in to comment.