Skip to content

Commit

Permalink
Merge pull request #2 from ikegami-yukino/sklearn-interface
Browse files Browse the repository at this point in the history
Sklearn interface
  • Loading branch information
ikegami-yukino committed Nov 25, 2016
2 parents a8858d9 + 00291b3 commit e7a30e1
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 18 deletions.
34 changes: 23 additions & 11 deletions .travis.yml
@@ -1,19 +1,31 @@
language: python
python:
- "2.6"
- "2.7"
- "3.3"
- "3.4"
- "3.5"

# Setup anaconda
before_install:
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
fi
- chmod +x miniconda.sh
- ./miniconda.sh -b -p /home/travis/miniconda
- export PATH=/home/travis/miniconda/bin:$PATH
- conda update --yes conda

# Install packages
install:
- "python setup.py install"
- "pip install coveralls"
- conda install --yes python=$TRAVIS_PYTHON_VERSION numpy scipy nose
- pip install coveralls nose-cov
- python setup.py install

# Run test
script:
- "nosetests --with-coverage --cover-package=oll"
- nosetests --with-coverage --cover-package=oll

# Calculate coverage
after_success:
- coveralls
notifications:
email:
recipients:
- yukino0131@me.com
on_success: always
on_failure: always
46 changes: 46 additions & 0 deletions oll/oll.py
Expand Up @@ -512,6 +512,52 @@ def add(self, example, y):
fv.push_back(IntFloatPair(_id, value))
self.train_method(fv, y)

def _array_to_feature_vector(self, x):
fv = FeatureVector()
if hasattr(x, 'indices'): # for sparse matrix
indices = map(int, x.indices)
values = map(float, x.data)
else:
nonzero = x.nonzero()
indices = map(int, nonzero[0])
values = map(float, x[nonzero])
for (_id, value) in zip(indices, values):
fv.push_back(IntFloatPair(_id, value))
return fv

def fit(self, X, y):
"""
train examples from numpy/scipy array
Args
X : numpy.ndarray or scipy.sparse matrix,
shape = (n_samples, self.n_features)
y : iterable
"""
assert set(y) == set([1, -1])
for (i, y_i) in enumerate(map(int, y)):
fv = self._array_to_feature_vector(X[i])
self.train_method(fv, y_i)

def predict(self, X):
"""
predict examples from numpy/scipy array
Args
X : numpy.ndarray or scipy.sparse matrix,
shape = (n_samples, self.n_features)
Return
labels : list (it takes 1 or -1)
"""
X = X.astype('float32')
labels = []
for i in range(X.shape[0]):
fv = self._array_to_feature_vector(X[i])
score = _oll.oll_classify(self, fv)
labels.append(1 if score > 0 else -1)
return labels


oll_swigregister = _oll.oll_swigregister
oll_swigregister(oll)

Expand Down
15 changes: 8 additions & 7 deletions setup.py
@@ -1,10 +1,11 @@
# -*- coding: utf-8 -*-
from codecs import open
import os
import re
from setuptools import setup, Extension


with open(os.path.join('oll', '__init__.py'), 'r') as f:
with open(os.path.join('oll', '__init__.py'), 'r', encoding='utf8') as f:
version = re.compile(
r'.*__version__ = "(.*?)"', re.S).match(f.read()).group(1)

Expand All @@ -20,10 +21,11 @@
name='oll',
version=version,
author="Yukino Ikegami",
author_email='yukino0131@me.com',
author_email='yknikgm@gmail.com',
url='https://github.com/ikegami-yukino/oll-python',
description="Online machine learning algorithms library (wrapper for OLL C++ library)",
long_description=open('README.rst').read() + "\n\n" + open('CHANGES.rst').read(),
description="Online binary classification algorithms library (wrapper for OLL C++ library)",
long_description='%s\n\n%s' % (open('README.rst', encoding='utf8').read(),
open('CHANGES.rst', encoding='utf8').read()),
ext_modules=[oll_module],
py_modules=["oll"],
headers=['lib/oll.hpp'],
Expand All @@ -33,14 +35,13 @@
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Text Processing :: Linguistic'
],
],
keywords=['machine learning', 'online learning', 'perceptron',
'Passive Agressive', 'PA', 'ALMA',
'Confidence Weighted Linear-Classification'],
Expand Down
27 changes: 27 additions & 0 deletions test_oll.py
Expand Up @@ -2,6 +2,8 @@
import os
import tempfile
from nose.tools import ok_, eq_, assert_raises, assert_almost_equals
import numpy as np
from scipy.sparse import csr_matrix
import oll


Expand Down Expand Up @@ -75,3 +77,28 @@ def test_setC(self):

def test_setBias(self):
self.oll.setBias(0.14)

def test_fit(self):
np_array = np.array([[1.0, 2.0, -1.0], [-0.5, 1.0, -0.5]])
y = [1, -1]
self.oll.fit(np_array, y)
assert_almost_equals(self.oll.classify({0: 1.0, 1: 1.0}), 0.171429, 6)

self.oll = oll.oll('PA1')
sparse_matrix = csr_matrix([[1.0, 2.0, -1.0], [-0.5, 1.0, -0.5]])
self.oll.fit(sparse_matrix, y)
assert_almost_equals(self.oll.classify({0: 1.0, 1: 1.0}), 0.171429, 6)

self.oll = oll.oll('PA1')
self.oll.fit(sparse_matrix, np.array([1, -1]))

assert_raises(AssertionError, self.oll.fit, np_array, [1, 2])

def test_predict(self):
self.oll.add({0: 1.0, 1: 2.0, 2: -1.0}, 1)
self.oll.add({0: -0.5, 1: 1.0, 2: -0.5}, -1)
np_array = np.array([[1.0, 1.0]])
eq_(self.oll.predict(np_array), [1])

sparse_matrix = csr_matrix([[1.0, 1.0]])
eq_(self.oll.predict(sparse_matrix), [1])

0 comments on commit e7a30e1

Please sign in to comment.