In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
import numpy
from six import BytesIO
from pruning import alternative_applier, utils
from pruning._matrixnetapplier import MatrixnetClassifier

In [3]:
mx_filename='../pruning/formula100.mx'
higgs_filename='../../../datasets/higgs/training.csv'

In [4]:
with open(mx_filename, 'rb') as mx:
    formula_mx = mx.read()

X, y, w = utils.get_higgs_data(higgs_filename)

In [5]:
X = X[:100000]
len(X)

100000

### Vectorization for every tree

In [6]:
mn = MatrixnetClassifier(BytesIO(formula_mx))

In [7]:
%%time
mn_predictions = mn.apply(X)

CPU times: user 192 ms, sys: 42.5 ms, total: 235 ms
Wall time: 289 ms


### Vectorization for every sample

In [8]:
new_mn = alternative_applier.convert_mx_to_alternative(formula_mx)

In [9]:
%%time
new_mn_predictions_fast = new_mn.decision_function_fast(X)

CPU times: user 3.91 s, sys: 25.3 ms, total: 3.93 s
Wall time: 3.98 s


In [10]:
import line_profiler
lp = line_profiler.LineProfiler()
lp.add_function(new_mn.decision_function_fast)
lp.run('new_mn_predictions = new_mn.decision_function_fast(X)')

<line_profiler.LineProfiler at 0x1182bb738>

In [11]:
lp.print_stats()

Timer unit: 1e-06 s

Total time: 4.68835 s
File: ../pruning/alternative_applier.py
Function: decision_function_fast at line 81

Line #      Hits         Time  Per Hit   % Time  Line Contents
    81                                               def decision_function_fast(self, X):
    82                                                   # taking appropriate columns
    83         1           19     19.0      0.0          assert len(self.features) == X.shape[1]
    84         1        16490  16490.0      0.4          X = numpy.array(X, dtype='float32')
    85         1          167    167.0      0.0          result = numpy.zeros(len(X), dtype=float)
    86                                           
    87         1           10     10.0      0.0          feature_ids = self.feature_ids[:, ::-1].T
    88         1            3      3.0      0.0          feature_cuts = self.feature_cuts[:, ::-1].T
    89         1           23     23.0      0.0          raveled_leaf_values = numpy.zeros(256

### Cython event-by-event

In [12]:
from embedded_matrixnet import MatrixnetClassifier as CythonApplier

In [13]:
cython_mn = CythonApplier(BytesIO(formula_mx))

In [14]:
%%time
for id, event in X.iterrows():
    cython_mn.apply(dict(event))

CPU times: user 4min 18s, sys: 1.99 s, total: 4min 20s
Wall time: 4min 31s
