Skip to content

Commit

Permalink
Merge pull request #367 from datamade/windows-fixes
Browse files Browse the repository at this point in the history
CI setup, some small tweaks to work on 32bit windows
  • Loading branch information
fgregg committed Mar 3, 2015
2 parents 0bc1f70 + 54c57ac commit d36e11f
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 43 deletions.
6 changes: 3 additions & 3 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ environment:
install:
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
- powershell .\\appveyor\\install.ps1

# Don't install from requirements-pip.txt, python-coveralls has broken dependencies on windows it seems.
- pip install numpy
- pip install --use-wheel --no-index http://bunkum.us/numpy-1.9.2+mkl-cp27-none-win32.whl
- pip install --use-wheel --no-index http://bunkum.us/fastcluster-1.1.13-cp27-none-win32.whl
- pip install -r requirements.txt
- cython src/cpredicates.pyx
- python setup.py develop
Expand All @@ -25,6 +24,7 @@ test_script:
# Nosetests take care of unit tests
# Behave runs the example scripts and tries to verify if it produces the right output
- coverage run -m nose -I canonical_test
- python tests/canonical_test.py

on_success:
# Could run coveralls here but will leave that to travis tests
Expand Down
4 changes: 3 additions & 1 deletion dedupe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
import copy
import os
from collections import defaultdict
import simplejson as json

from dedupe.backport import OrderedDict, json
from dedupe.backport import OrderedDict

import dedupe
import dedupe.sampling as sampling
Expand Down Expand Up @@ -124,6 +125,7 @@ def matchBlocks(self, blocks, threshold=.5, *args, **kwargs): # pragma : no cove

try :
match_file = matches.filename
del matches
os.remove(match_file)
except AttributeError :
pass
Expand Down
19 changes: 1 addition & 18 deletions dedupe/backport.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
" http://mail.scipy.org/pipermail/numpy-discussion/2012-August/063589.html")
MULTIPROCESSING = False
elif platform.system() == 'Windows' :
warnings.warn("Dedupe does not currenly support multiprocessing on Windows")
warnings.warn("Dedupe does not currently support multiprocessing on Windows")
MULTIPROCESSING = False

if MULTIPROCESSING :
Expand All @@ -28,28 +28,11 @@
from multiprocessing.dummy import Process, Pool, Queue
SimpleQueue = Queue

try:
from thread import get_ident as _get_ident
except ImportError:
from dummy_thread import get_ident as _get_ident

try:
from _abcoll import KeysView, ValuesView, ItemsView
except ImportError:
pass

try :
from collections import OrderedDict
except ImportError :
from ordereddict import OrderedDict

try:
from simplejson.scanner import py_make_scanner
import simplejson as json
except ImportError:
from json.scanner import py_make_scanner
import json


def cartesian(arrays, out=None):
"""Generate a cartesian product of input arrays.
Expand Down
15 changes: 6 additions & 9 deletions dedupe/serializer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from backport import py_make_scanner, json
import simplejson as json
from simplejson.scanner import py_make_scanner
import dedupe.core

def _to_json(python_object):
Expand All @@ -19,14 +20,10 @@ def _from_json(json_object):
class dedupe_decoder(json.JSONDecoder):

def __init__(self, **kwargs):
try :
json._toggle_speedups(False) # in simplejson, without this
# some strings can be
# bytestrings instead of
# unicode
# https://code.google.com/p/simplejson/issues/detail?id=40
except AttributeError :
pass
json._toggle_speedups(False) # in simplejson, without this
# some strings can be bytestrings
# instead of unicode
# https://code.google.com/p/simplejson/issues/detail?id=40
json.JSONDecoder.__init__(self, object_hook=_from_json, **kwargs)
# Use the custom JSONArray
self.parse_array = self.JSONArray
Expand Down
11 changes: 3 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

install_requires=['numpy>=1.9',
'fastcluster',
'hcluster',
'hcluster>=0.3.0',
'categorical-distance',
'rlr',
'metafone',
Expand All @@ -17,14 +17,10 @@
'simplecosine',
'haversine',
'BTrees==4.0.8',
'simplejson',
'zope.interface',
'zope.index']

try:
from json.scanner import py_make_scanner
except ImportError:
install_requires.append('simplejson')

try :
from collections import OrderedDict
except ImportError:
Expand All @@ -39,8 +35,7 @@
packages=['dedupe', 'dedupe.variables'],
ext_modules=[Extension('dedupe.cpredicates', ['src/cpredicates.c'])],
license='The MIT License: http://www.opensource.org/licenses/mit-license.php',

dependency_links = ['http://github.com/al45tair/metaphone/tarball/master#egg=metaphone'],
dependency_links = ['http://github.com/datamade/hcluster/tarball/master#egg=hcluster-0.3.0'],
install_requires=install_requires,
classifiers=[
'Development Status :: 3 - Alpha',
Expand Down
4 changes: 2 additions & 2 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def test_random_pair(self) :

random.seed(123)
numpy.random.seed(123)
assert numpy.array_equal(dedupe.core.randomPairs(11**9, 1),
numpy.array([[1228959102, 1840268610]]))
assert numpy.array_equal(dedupe.core.randomPairs(10**3, 1),
numpy.array([(292, 413)]))



Expand Down
6 changes: 4 additions & 2 deletions tests/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import unittest
import codecs
import StringIO
from dedupe.backport import OrderedDict, json

from dedupe.backport import OrderedDict
import simplejson as json

class SerializerTest(unittest.TestCase) :
def test_writeTraining(self) :
Expand Down Expand Up @@ -35,6 +35,8 @@ def test_writeTraining(self) :
encoded_file.seek(0)

deduper.readTraining(output)
print deduper.training_pairs
print training_pairs
assert repr(deduper.training_pairs) == repr(training_pairs)
assert deduper.training_pairs == training_pairs

Expand Down

0 comments on commit d36e11f

Please sign in to comment.