In [1]:
%load_ext cython

In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [3]:
n = 1000000
y = np.random.randint(2, size=(n, 1))
x = np.random.randint(10, size=(n, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

In [4]:
def target_mean_v1(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    for i in range(data.shape[0]):
        groupby_result = data[data.index != i].groupby([x_name], as_index=False).agg(['mean', 'count'])
        result[i] = groupby_result.loc[groupby_result.index == data.loc[i, x_name], (y_name, 'mean')]
    return result

In [5]:
def target_mean_v2(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    value_dict = dict()
    count_dict = dict()
    for i in range(data.shape[0]):
        if data.loc[i, x_name] not in value_dict.keys():
            value_dict[data.loc[i, x_name]] = data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] = 1
        else:
            value_dict[data.loc[i, x_name]] += data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] += 1
    for i in range(data.shape[0]):
        result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i, y_name]) / (count_dict[data.loc[i, x_name]] - 1)
    return result

In [6]:
def target_mean_v3(data, y_name, x_name):
    n = data.shape[0]
    X = data[x_name].values
    Y = data[y_name].values
    value_dict = defaultdict(lambda:0)
    count_dict = defaultdict(lambda:0)
    for x, y in zip(X, Y):
        value_dict[x] += y
        count_dict[x] += 1
    result = [(value_dict[x] - y) / (count_dict[x] - 1) for x, y in zip(X, Y)]
    return result

In [7]:
def test_eq(a, b): assert np.allclose(a, b)

In [8]:
test_eq(target_mean_v1(data[:5000], 'y', 'x'), target_mean_v3(data[:5000], 'y', 'x'))
%timeit res_v3 = target_mean_v3(data[:5000], 'y', 'x')

6.97 ms ± 165 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Cython INT8

In [9]:
%%cython

import numpy as np
cimport numpy as c_np
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
@cython.overflowcheck(False)
@cython.binding(False)
def target_mean_v3_cy(data, str y_name, str x_name):
    cdef:
        int n = data.shape[0]
        c_np.ndarray[double] output = np.empty(n)
        int i
        int cnt[10]
        double val[10]
        unsigned char x
        unsigned char[:] X = data[x_name].values
        unsigned char[:] Y = data[y_name].values
        double[:] result = output

    for i from 0 <= i < 10:
        cnt[i] = 0
        val[i] = 0.

    for i from 0 <= i < n:
        x = X[i]
        val[x] += Y[i]
        cnt[x] += 1

    for i from 0 <= i < n:
        x = X[i]
        result[i] = (val[x] - Y[i]) / (cnt[x] - 1)

    return output

In [10]:
data2 = data.copy()
data2['x'] = data['x'].astype('uint8')
data2['y'] = data['y'].astype('uint8')

In [14]:
test_eq(target_mean_v3(data, 'y', 'x'), target_mean_v3_cy(data2, 'y', 'x'))
%timeit -n 1000 target_mean_v3_cy(data2, 'y', 'x')

2.45 ms ± 79.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Parallel by openmp

In [13]:
%%cython

# distutils: language=c++

import numpy as np
cimport numpy as c_np
import cython
cimport cython
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void create_dict(double* val, int* cnt, unsigned char[:] X, unsigned char[:] Y, int i) nogil:
    cdef unsigned char x = X[i]
    val[x] += Y[i]
    cnt[x] += 1
    
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef void mean_encoding(double[:] result, double* val, int* cnt, unsigned char[:] X, unsigned char[:] Y, int i) nogil:
    cdef unsigned char x = X[i]
    result[i] = (val[x] - Y[i]) / (cnt[x] - 1)

@cython.boundscheck(False)
@cython.wraparound(False)
def target_mean_v3_parallel(data, str y_name, str x_name):
    cdef:
        int n = data.shape[0]
        int i
        int cnt[10]
        double val[10]
        unsigned char x
        unsigned char[:] X = data[x_name].values
        unsigned char[:] Y = data[y_name].values
        c_np.ndarray[double] output = np.empty(n)
        double[:] result = output

    for i from 0 <= i < 10:
        cnt[i] = 0
        val[i] = 0.

    for i in prange(n, nogil=True):
        create_dict(val, cnt, X, Y, i)
  
    for i in prange(n, nogil=True):
        mean_encoding(result, val, cnt, X, Y, i)

    return output

In [15]:
test_eq(target_mean_v3(data, 'y', 'x'), target_mean_v3_parallel(data2, 'y', 'x'))
%timeit -n 1000 res_v4 = target_mean_v3_parallel(data2, 'y', 'x')

2.88 ms ± 81.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [16]:
!cat /proc/cpuinfo | grep processor | wc -l

12
