# 并行版本

In [None]:
%%cython --cplus -a
import numpy as np
cimport numpy as cnp 
cimport cython
from libcpp.map cimport map as mapcpp
from cython.parallel import prange

@cython.boundscheck(False)
@cython.wraparound(False)
# 要采取一致的类型，不能 Py_ssize_t n long[:] x_array[] -> x_array[n] -> 段错误
# cdef void write_2_map(Py_ssize_t[:]  x_array,
#             Py_ssize_t[:]  y_array,
#             const Py_ssize_t n,
#             mapcpp[int, int] &sum_map,
#             mapcpp[int, int] &count_map) nogil:
#   # start here
#   cdef mapcpp[int, int].iterator it
#   cdef Py_ssize_t x = x_array[n]
#   cdef Py_ssize_t y = y_array[n]
#   it = sum_map.find(x)
#   if it != sum_map.end():
#     sum_map[x]  += y
#     count_map[x] += 1
#   else:
#     sum_map[x]  = y
#     count_map[x] = 1

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef double[:] target_mean_cy_with_para(data, str y_name, str x_name):
  cdef mapcpp[int, int] type_2_sum_dict  = mapcpp[int, int]()
  cdef mapcpp[int, int] type_2_count_dict = mapcpp[int, int]()
  cdef mapcpp[int, int].iterator it

  cdef Py_ssize_t i
  cdef Py_ssize_t x, y # 不得放到 for 里头用 cdef
  # cdef cnp.ndarray[Py_ssize_t] n_y = data[y_name].values # values faster than to_numpy
  # cdef cnp.ndarray[Py_ssize_t] n_x = data[x_name].values
  cdef Py_ssize_t[:] n_y = data[y_name].values
  cdef Py_ssize_t[:] n_x = data[x_name].values
  cdef Py_ssize_t nums  = n_x.shape[0]
  cdef double[:] result = np.zeros(nums, np.float64)
  for i in prange(nums, nogil = True):
    # write_2_map(n_x, n_y, i, type_2_sum_dict, type_2_count_dict)
    x = n_x[i]
    y = n_y[i]
    it = type_2_sum_dict.find(x)
    if it != type_2_sum_dict.end():
      type_2_sum_dict[x]  += y
      type_2_count_dict[x] += 1
    else:
      type_2_sum_dict[x]  = y
      type_2_count_dict[x] = 1
  for i in prange(nums, nogil = True):
    x = n_x[i]
    y = n_y[i]
    result[i]  = (type_2_sum_dict[x] - y) / (type_2_count_dict[x] - 1)
  return result

# 非并行版本

In [None]:
%%cython -a --cplus
import numpy as np
cimport numpy as cnp 
cimport cython
from libcpp.map cimport map as mapcpp


@cython.boundscheck(False)
@cython.wraparound(False)
# 发现 pandas 瓶颈，换成 numpy
cpdef target_mean_cy_without_para(data, str y_name, str x_name):
  cdef mapcpp[int, int] type_2_sum_dict   = mapcpp[int, int]()
  cdef mapcpp[int, int] type_2_count_dict = mapcpp[int, int]()
  cdef mapcpp[int, int].iterator it

  cdef Py_ssize_t i
  cdef Py_ssize_t x, y # 不得放到 for 里头用 cdef
  # cdef cnp.ndarray[Py_ssize_t] n_y = data[y_name].values # values faster than to_numpy
  # cdef cnp.ndarray[Py_ssize_t] n_x = data[x_name].values
  cdef Py_ssize_t[:] n_y = data[y_name].values # values faster than to_numpy
  cdef Py_ssize_t[:] n_x = data[x_name].values

  cdef Py_ssize_t nums  = n_x.shape[0] # 这个 2us 优化，一般
  # cdef cnp.ndarray[double] result = np.zeros(nums)
  cdef float[:] result = np.zeros(nums, np.float32)
  for i from 0 <= i < nums by 1: 
  # for i in range(nums):
    x = n_x[i]
    y = n_y[i]
    it = type_2_sum_dict.find(x)
    if it != type_2_sum_dict.end():
      type_2_sum_dict[x]   += y
      type_2_count_dict[x] += 1
    else:
      type_2_sum_dict[x]   = y
      type_2_count_dict[x] = 1
  # for i in range(nums):
  for i from 0 <= i < nums by 1: 
    x = n_x[i]
    y = n_y[i]
    result[i]  = (type_2_sum_dict[x] - y) / (type_2_count_dict[x] - 1)
  return result