In [1]:
%load_ext line_profiler
%load_ext cython

In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [3]:
y = np.random.randint(2, size=(500, 1))
x = np.random.randint(10, size=(500, 1))
data = pd.DataFrame(np.concatenate([y, x], axis=1), columns=['y', 'x'])

In [4]:
def target_mean_v1(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    for i in range(data.shape[0]):
        groupby_result = data[data.index != i].groupby([x_name], as_index=False).agg(['mean', 'count'])
        result[i] = groupby_result.loc[groupby_result.index == data.loc[i, x_name], (y_name, 'mean')]
    return result

def test_eq(a, b):
    assert np.allclose(a, b)

In [5]:
def target_mean_v2(data, y_name, x_name):
    result = np.zeros(data.shape[0])
    value_dict = dict()
    count_dict = dict()
    for i in range(data.shape[0]):
        if data.loc[i, x_name] not in value_dict.keys():
            value_dict[data.loc[i, x_name]] = data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] = 1
        else:
            value_dict[data.loc[i, x_name]] += data.loc[i, y_name]
            count_dict[data.loc[i, x_name]] += 1
    for i in range(data.shape[0]):
        result[i] = (value_dict[data.loc[i, x_name]] - data.loc[i, y_name]) / (count_dict[data.loc[i, x_name]] - 1)
    return result

In [6]:
test_eq(target_mean_v1(data, 'y', 'x'), target_mean_v2(data, 'y', 'x'))

In [7]:
%%timeit
res_v1 = target_mean_v1(data, 'y', 'x')

1.41 s ± 53.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit
res_v2 = target_mean_v2(data, 'y', 'x')

26.7 ms ± 277 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
print(f"V2: Speed up {1.41 / (26.7 / 1e3):.1f} times")

V2: Speed up 52.8 times


In [13]:
def target_mean_v3(data, y_name, x_name):
    n = data.shape[0]
    X = data[x_name].values
    Y = data[y_name].values
    value_dict = defaultdict(lambda:0)
    count_dict = defaultdict(lambda:0)
    for x, y in zip(X, Y):
        value_dict[x] += y
        count_dict[x] += 1
    result = [(value_dict[x] - y) / (count_dict[x] - 1) for x, y in zip(X, Y)]
    return result

In [12]:
test_eq(target_mean_v1(data, 'y', 'x'), target_mean_v3(data, 'y', 'x'))
%timeit res_v3 = target_mean_v3(data, 'y', 'x')

781 µs ± 5.79 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [15]:
print(f"Python V3: Speed up {1.41 / (781 / 1e6):.1f} times")

Python V3: Speed up 1805.4 times


# Cython

In [69]:
%%cython -a

import numpy as np
cimport numpy as c_np
cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef void _target_mean_v3_cy(double[:] result, long[:] X, long[:] Y, long n):
    cdef:
        long i
        int cnt[10]
        double val[10]

    for i from 0 <= i < 10:
        cnt[i] = 0
        val[i] = 0.

    for i from 0 <= i < n:
        val[X[i]] += Y[i]
        cnt[X[i]] += 1
    for i from 0 <= i < n:
        result[i] = (val[X[i]] - Y[i]) / (cnt[X[i]] - 1)

def target_mean_v3_cy(data, y_name, x_name):
    cdef long n = data.shape[0]
    result = np.empty(n)
    _target_mean_v3_cy(result, data[x_name].values, data[y_name].values, n)
    return result

In [70]:
test_eq(target_mean_v1(data, 'y', 'x'), target_mean_v3_cy(data, 'y', 'x'))
%timeit target_mean_v3_cy(data, 'y', 'x')

10.4 µs ± 248 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [21]:
print(f"Cython V3: Speed up {1.41 / (9.44 / 1e6):.0f} times")

Cython V3: Speed up 149364 times


# Parallel by ray

In [6]:
import ray
ray.init();

2021-01-05 21:12:46,680	INFO services.py:1171 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [7]:
@ray.remote
def mean_enc(val_dict, cnt_dict, x, y):
    return (val_dict[x] - y) / (cnt_dict[x] - 1)

def target_mean_v4(data, y_name, x_name):
    n = data.shape[0]
    X = data[x_name].values
    Y = data[y_name].values
    value_dict = defaultdict(lambda:0)
    count_dict = defaultdict(lambda:0)
    for x, y in zip(X, Y):
        value_dict[x] += y
        count_dict[x] += 1
    result = [mean_enc.remote(value_dict, count_dict, x, y) for x, y in zip(X, Y)]
    return ray.get(result)

In [18]:
# 
test_eq(target_mean_v1(data, 'y', 'x'), target_mean_v4(data, 'y', 'x'))
%timeit res_v4 = target_mean_v4(data, 'y', 'x')

341 ms ± 8.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Parallel by openmp

In [36]:
%%cython -a

# distutils: language=c++

import numpy as np
cimport numpy as c_np
import cython
cimport cython
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void create_dict(double* val, int* cnt, long[:] X, long[:] Y, int i) nogil:
    val[X[i]] += Y[i]
    cnt[X[i]] += 1
    
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef void mean_encoding(double[:] result, double* val, int* cnt, long[:] X, long[:] Y, int i) nogil:
    result[i] = (val[X[i]] - Y[i]) / (cnt[X[i]] - 1)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _target_mean_v4_cy(double[:] result, long[:] X, long[:] Y, long n):
    cdef:
        long i
        int cnt[10]
        double val[10]

    for i from 0 <= i < 10:
        cnt[i] = 0
        val[i] = 0.

    for i in prange(n, nogil=True):
        create_dict(val, cnt, X, Y, i)
        
    for i in prange(n, nogil=True):
        mean_encoding(result, val, cnt, X, Y, i)

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef target_mean_v4_cy(data, y_name, x_name):
    cdef long n = data.shape[0]
    result = np.empty(n)
    _target_mean_v4_cy(result, data[x_name].values, data[y_name].values, n)
    return result

In [43]:
test_eq(target_mean_v1(data, 'y', 'x'), target_mean_v4_cy(data, 'y', 'x'))
%timeit res_v4 = target_mean_v4_cy(data, 'y', 'x')

11.6 µs ± 469 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [38]:
print(f"Cython V4: Speed up {1.41 / (11.9 / 1e6):.0f} times")

Cython V4: Speed up 118487 times
