diff --git a/.travis.yml b/.travis.yml index 4e85340..43e9dad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,19 +1,31 @@ language: python python: - - "2.6" - "2.7" - - "3.3" - "3.4" + - "3.5" + +# Setup anaconda +before_install: + - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; + else + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + fi + - chmod +x miniconda.sh + - ./miniconda.sh -b -p /home/travis/miniconda + - export PATH=/home/travis/miniconda/bin:$PATH + - conda update --yes conda + +# Install packages install: - - "python setup.py install" - - "pip install coveralls" + - conda install --yes python=$TRAVIS_PYTHON_VERSION numpy scipy nose + - pip install coveralls nose-cov + - python setup.py install + +# Run test script: - - "nosetests --with-coverage --cover-package=oll" + - nosetests --with-coverage --cover-package=oll + +# Calculate coverage after_success: - coveralls -notifications: - email: - recipients: - - yukino0131@me.com - on_success: always - on_failure: always diff --git a/oll/oll.py b/oll/oll.py index 60ed634..67e2eb0 100644 --- a/oll/oll.py +++ b/oll/oll.py @@ -512,6 +512,52 @@ def add(self, example, y): fv.push_back(IntFloatPair(_id, value)) self.train_method(fv, y) + def _array_to_feature_vector(self, x): + fv = FeatureVector() + if hasattr(x, 'indices'): # for sparse matrix + indices = map(int, x.indices) + values = map(float, x.data) + else: + nonzero = x.nonzero() + indices = map(int, nonzero[0]) + values = map(float, x[nonzero]) + for (_id, value) in zip(indices, values): + fv.push_back(IntFloatPair(_id, value)) + return fv + + def fit(self, X, y): + """ + train examples from numpy/scipy array + + Args + X : numpy.ndarray or scipy.sparse matrix, + shape = (n_samples, self.n_features) + y : iterable + """ + assert set(y) == set([1, -1]) + for (i, y_i) in enumerate(map(int, y)): + fv = self._array_to_feature_vector(X[i]) + self.train_method(fv, y_i) + + def predict(self, X): + """ + predict examples from numpy/scipy array + + Args + X : numpy.ndarray or scipy.sparse matrix, + shape = (n_samples, self.n_features) + Return + labels : list (it takes 1 or -1) + """ + X = X.astype('float32') + labels = [] + for i in range(X.shape[0]): + fv = self._array_to_feature_vector(X[i]) + score = _oll.oll_classify(self, fv) + labels.append(1 if score > 0 else -1) + return labels + + oll_swigregister = _oll.oll_swigregister oll_swigregister(oll) diff --git a/setup.py b/setup.py index 431f746..fd100bd 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- +from codecs import open import os import re from setuptools import setup, Extension -with open(os.path.join('oll', '__init__.py'), 'r') as f: +with open(os.path.join('oll', '__init__.py'), 'r', encoding='utf8') as f: version = re.compile( r'.*__version__ = "(.*?)"', re.S).match(f.read()).group(1) @@ -20,10 +21,11 @@ name='oll', version=version, author="Yukino Ikegami", - author_email='yukino0131@me.com', + author_email='yknikgm@gmail.com', url='https://github.com/ikegami-yukino/oll-python', - description="Online machine learning algorithms library (wrapper for OLL C++ library)", - long_description=open('README.rst').read() + "\n\n" + open('CHANGES.rst').read(), + description="Online binary classification algorithms library (wrapper for OLL C++ library)", + long_description='%s\n\n%s' % (open('README.rst', encoding='utf8').read(), + open('CHANGES.rst', encoding='utf8').read()), ext_modules=[oll_module], py_modules=["oll"], headers=['lib/oll.hpp'], @@ -33,14 +35,13 @@ 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Scientific/Engineering :: Information Analysis', 'Topic :: Text Processing :: Linguistic' - ], + ], keywords=['machine learning', 'online learning', 'perceptron', 'Passive Agressive', 'PA', 'ALMA', 'Confidence Weighted Linear-Classification'], diff --git a/test_oll.py b/test_oll.py index 039b2ad..33b4815 100644 --- a/test_oll.py +++ b/test_oll.py @@ -2,6 +2,8 @@ import os import tempfile from nose.tools import ok_, eq_, assert_raises, assert_almost_equals +import numpy as np +from scipy.sparse import csr_matrix import oll @@ -75,3 +77,28 @@ def test_setC(self): def test_setBias(self): self.oll.setBias(0.14) + + def test_fit(self): + np_array = np.array([[1.0, 2.0, -1.0], [-0.5, 1.0, -0.5]]) + y = [1, -1] + self.oll.fit(np_array, y) + assert_almost_equals(self.oll.classify({0: 1.0, 1: 1.0}), 0.171429, 6) + + self.oll = oll.oll('PA1') + sparse_matrix = csr_matrix([[1.0, 2.0, -1.0], [-0.5, 1.0, -0.5]]) + self.oll.fit(sparse_matrix, y) + assert_almost_equals(self.oll.classify({0: 1.0, 1: 1.0}), 0.171429, 6) + + self.oll = oll.oll('PA1') + self.oll.fit(sparse_matrix, np.array([1, -1])) + + assert_raises(AssertionError, self.oll.fit, np_array, [1, 2]) + + def test_predict(self): + self.oll.add({0: 1.0, 1: 2.0, 2: -1.0}, 1) + self.oll.add({0: -0.5, 1: 1.0, 2: -0.5}, -1) + np_array = np.array([[1.0, 1.0]]) + eq_(self.oll.predict(np_array), [1]) + + sparse_matrix = csr_matrix([[1.0, 1.0]]) + eq_(self.oll.predict(sparse_matrix), [1])