In [1]:
import numpy as np
from unique import unique
import pandas as pd
from numba import jit, prange

import numba_functions as nbf

from tqdm.notebook import tqdm, trange

In [2]:
class GroupBy:
    
    fnc_dict = {
        'count':0,
        'size':0,
        'mean':1,
        'median':2,
        'std':3,
        'var':4,
        'nunique':5,
        'sum':6,
    }
    
    fncs_apply = [nbf.size_ax1, nbf.mean_ax1, nbf.median_ax1, nbf.std_ax1, nbf.var_ax1, _, nbf.sum_ax1]
    fncs_agg = [nbf.size, nbf.mean, nbf.median, nbf.std, nbf.var, _, nbf.sum]
    
    def __init__(self, ar, by):
        """ Initialize the groupby object with a 2d array """
        self.idx_sort, self.gr_counts = unique(ar[:, by], return_counts=True, axis=0)
#         self.keys, self.idx_sort, self.gr_counts = unique(ar[:, by], return_argsort=True, return_counts=True, axis=0)
        
        self.by = by
        self.gr_idx = np.concatenate(
            (np.array([0]), np.cumsum(self.gr_counts))
        )
        # The group indices are given in a (start, stop) format
        self.gr_idx = [(self.gr_idx[i], self.gr_idx[i+1]) for i in range(self.gr_idx.shape[0] - 1)]
        
        self.arr_og = ar[self.idx_sort]
        
        # Separate the matrix into two groups: indices and values
        self.aux = np.full(self.arr_og.shape[1], True)
        self.aux[by] = False
        
        self.idx = self.arr_og[:, self.by]
        self.keys = self.idx[[i[0] for i in self.gr_idx]]
        self.vals = self.arr_og[:, self.aux]
        
        # Create a version of the array where he key values are on the left, and the value columns are on the right
        self.arr = np.concatenate((self.idx, self.vals), axis=1)
        # Initialize a list of group values. This may be computed once if needed for aggregation functions
        self.gr_vals_list = None
    

    def get_groups(self, only_vals=False):
        """
            Return a list of all groups (idx, vals). Can also only return values in group
            
            Parameters
            ----------
            only_vals : optional, False
                Only return groups of values instead of (idx, vals). 
            
            Returns
            -------
            groups : list
                A list of 2d arrays representing each group
        """
        if only_vals:
            if self.gr_vals_list is None:
                self.gr_vals_list = [self.vals[start:stop] for (start, stop) in self.gr_idx]
                return self.gr_vals_list
            else:
                return self.gr_vals_list
            
        return [self.arr[start:stop] for (start, stop) in self.gr_idx]

    
    def apply(self, fnc):
        """
        Apply a function to each column of a group. The index columns will be included in the result as the left-
        hand-side of the return array. 
        
        Parameters
        ----------
        fnc : string
            Should be one of ['count', 'size', 'mean', 'median', 'std', 'var', 'nunique', 'sum']. 
            The function to be applied to all non-index columns of the data.
        
        Returns
        -------
        arr_transformed : 2d array
            An array with number of rows matching the number of groups, with transformed values of the
            non-index columns.
        
        """
        f = self.fncs_apply[self.fnc_dict[fnc]]
        
        if self.gr_vals_list is None:
            # We can store some values here to save ourselves from recomputing again later on
            self.get_groups(only_vals=True)
            
        res = np.array([f(gr) for gr in self.gr_vals_list])
        
        return np.concatenate((self.keys, res), axis=1)
    
        
    def agg(self, fncs):
        """
        Apply a different specified function to each column of a group. The index columns will be included 
        in the result as the left-hand-side of the return array. 
        
        Parameters
        ----------
        fnc : list of strings
            Each item in the list should be one of ['count', 'size', 'mean', 'median', 'std', 'var', 'nunique', 
            'sum']. These represent the functions to be applied to each non-index columns of the data.
        
        Returns
        -------
        arr_transformed : 2d array
            An array with number of rows matching the number of groups, with transformed values of the
            non-index columns.
        
        """
        f = [self.fncs_agg[self.fnc_dict[fnc]] for fnc in fncs]
        
        if self.gr_vals_list is None:
            # We can store some values here to save ourselves from recomputing again later on
            self.get_groups(only_vals=True)
        
        res = np.array([[f[i](gr[:, i]) for i in range(gr.shape[1])] for gr in self.gr_vals_list])
        
        return np.concatenate((self.keys, res), axis=1)
            

In [3]:
n, m = 20, 8
arr = np.random.randint(5, size=n*m).reshape((n,m))
by = [0,1]

arr

array([[1, 1, 3, 0, 4, 3, 3, 3],
       [3, 2, 1, 0, 1, 2, 0, 3],
       [3, 3, 0, 1, 4, 4, 0, 1],
       [0, 1, 3, 1, 2, 0, 2, 4],
       [0, 3, 1, 4, 4, 0, 0, 0],
       [2, 2, 3, 2, 1, 1, 2, 3],
       [3, 4, 4, 0, 0, 2, 2, 1],
       [2, 3, 0, 1, 0, 4, 1, 0],
       [1, 1, 2, 4, 2, 4, 0, 0],
       [4, 0, 4, 4, 0, 4, 0, 3],
       [0, 3, 1, 0, 2, 1, 4, 4],
       [3, 0, 1, 3, 4, 4, 4, 0],
       [4, 1, 0, 1, 1, 4, 3, 3],
       [1, 4, 3, 0, 1, 2, 2, 1],
       [0, 3, 2, 3, 4, 1, 1, 2],
       [4, 1, 1, 3, 0, 2, 0, 0],
       [2, 4, 2, 1, 3, 4, 3, 1],
       [3, 2, 1, 0, 0, 1, 1, 0],
       [2, 1, 2, 3, 0, 0, 1, 1],
       [2, 3, 1, 2, 4, 1, 4, 1]])

In [4]:
nbf.mean_ax0(arr)

array([2.25 , 1.5  , 2.   , 1.625, 1.5  , 2.   , 2.   , 1.375, 1.75 ,
       2.375, 1.875, 2.375, 2.125, 1.75 , 2.   , 1.375, 2.5  , 1.   ,
       1.25 , 2.25 ])

In [5]:
gb = GroupBy(arr, by=[1,2])

In [11]:
gb.get_groups()

[array([[0, 1, 3, 3, 4, 4, 4, 0]]),
 array([[0, 4, 4, 4, 0, 4, 0, 3]]),
 array([[1, 0, 4, 1, 1, 4, 3, 3]]),
 array([[1, 1, 4, 3, 0, 2, 0, 0]]),
 array([[1, 2, 2, 3, 0, 0, 1, 1],
        [1, 2, 1, 4, 2, 4, 0, 0]]),
 array([[1, 3, 1, 0, 4, 3, 3, 3],
        [1, 3, 0, 1, 2, 0, 2, 4]]),
 array([[2, 1, 3, 0, 1, 2, 0, 3],
        [2, 1, 3, 0, 0, 1, 1, 0]]),
 array([[2, 3, 2, 2, 1, 1, 2, 3]]),
 array([[3, 0, 2, 1, 0, 4, 1, 0],
        [3, 0, 3, 1, 4, 4, 0, 1]]),
 array([[3, 1, 0, 4, 4, 0, 0, 0],
        [3, 1, 0, 0, 2, 1, 4, 4],
        [3, 1, 2, 2, 4, 1, 4, 1]]),
 array([[3, 2, 0, 3, 4, 1, 1, 2]]),
 array([[4, 2, 2, 1, 3, 4, 3, 1]]),
 array([[4, 3, 1, 0, 1, 2, 2, 1]]),
 array([[4, 4, 3, 0, 0, 2, 2, 1]])]

In [13]:
pd.DataFrame(gb.agg(['count', 'mean', 'median', 'sum', 'mean', 'size']))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.0,1.0,1.0,3.0,4.0,4.0,4.0,1.0
1,0.0,4.0,1.0,4.0,0.0,4.0,0.0,1.0
2,1.0,0.0,1.0,1.0,1.0,4.0,3.0,1.0
3,1.0,1.0,1.0,3.0,0.0,2.0,0.0,1.0
4,1.0,2.0,2.0,3.5,1.0,4.0,0.5,2.0
5,1.0,3.0,2.0,0.5,3.0,3.0,2.5,2.0
6,2.0,1.0,2.0,0.0,0.5,3.0,0.5,2.0
7,2.0,3.0,1.0,2.0,1.0,1.0,2.0,1.0
8,3.0,0.0,2.0,1.0,2.0,8.0,0.5,2.0
9,3.0,1.0,3.0,2.0,4.0,2.0,2.666667,3.0


In [16]:
df = pd.DataFrame(arr)

df.groupby([1,2]).agg({0:'count', 3:'mean', 4:'median', 5:'sum', 6:'mean', 7:'size'})

Unnamed: 0_level_0,Unnamed: 1_level_0,0,3,4,5,6,7
1,2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1,1,3.0,4.0,4,4.0,1
0,4,1,4.0,0.0,4,0.0,1
1,0,1,1.0,1.0,4,3.0,1
1,1,1,3.0,0.0,2,0.0,1
1,2,2,3.5,1.0,4,0.5,2
1,3,2,0.5,3.0,3,2.5,2
2,1,2,0.0,0.5,3,0.5,2
2,3,1,2.0,1.0,1,2.0,1
3,0,2,1.0,2.0,8,0.5,2
3,1,3,2.0,4.0,2,2.666667,3


In [134]:
pd.DataFrame(gb.apply('median'))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.0,0.0,1.0,3.0,1.0,0.0,3.0,3.0
1,0.0,1.0,3.0,1.0,3.0,1.0,1.0,3.0
2,0.0,4.0,0.5,2.5,1.0,3.0,2.0,2.0
3,1.0,0.0,4.0,2.0,4.0,1.0,1.0,0.0
4,1.0,3.0,0.0,3.0,2.0,4.0,2.0,2.0
5,2.0,0.0,3.5,0.5,3.0,0.5,3.0,0.5
6,2.0,3.0,3.0,4.0,1.0,4.0,1.0,0.0
7,2.0,4.0,2.5,1.5,3.0,1.0,2.0,1.0
8,3.0,1.0,3.0,3.0,2.0,1.0,1.0,4.0
9,3.0,2.0,4.0,4.0,3.5,3.5,1.5,4.0


In [26]:
%prun [GroupBy(arr, by=[0,1]) for i in range(100000)]

 

In [27]:
%prun [df.groupby([0,1]) for i in range(100000)]

 

In [35]:
gb = GroupBy(arr, by=[0,1])

for i in trange(100000):
    gb.agg(['count', 'mean', 'median', 'sum', 'mean', 'size'])

  0%|          | 0/100000 [00:00<?, ?it/s]

In [36]:
gb = df.groupby([0,1])

for i in trange(100000):
    gb.agg({0:'count', 3:'mean', 4:'median', 5:'sum', 6:'mean', 7:'size'})

  0%|          | 0/100000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
10450

In [6]:
gb.arr

array([[0, 1, 0, 1, 1, 2, 3, 2],
       [0, 1, 1, 0, 0, 0, 4, 0],
       [0, 2, 2, 4, 4, 2, 3, 4],
       [0, 3, 2, 2, 0, 4, 0, 0],
       [1, 0, 0, 0, 3, 0, 1, 2],
       [1, 1, 1, 0, 0, 4, 1, 4],
       [1, 2, 2, 3, 0, 2, 1, 3],
       [1, 4, 3, 4, 1, 2, 3, 2],
       [2, 0, 4, 1, 4, 0, 4, 0],
       [2, 1, 0, 1, 4, 4, 3, 3],
       [2, 2, 0, 2, 4, 0, 3, 4],
       [2, 3, 4, 4, 4, 4, 4, 3],
       [3, 1, 1, 3, 2, 4, 0, 1],
       [3, 1, 2, 1, 1, 4, 3, 2],
       [3, 1, 2, 3, 1, 1, 1, 1],
       [3, 2, 3, 1, 1, 2, 0, 1],
       [3, 3, 1, 1, 4, 0, 1, 2],
       [4, 2, 2, 0, 0, 2, 0, 4],
       [4, 3, 3, 3, 1, 1, 3, 1],
       [4, 3, 3, 0, 2, 2, 4, 1]])

In [58]:
gb.apply('mean')

[[2 0 4 3 4 4]]
[[2 2 0 2 0 0]
 [0 0 3 2 0 0]]
[[1 0 2 4 3 4]
 [3 0 4 3 4 2]]
[[1 1 3 4 2 0]]
[[2 1 4 3 0 1]]
[[4 2 4 0 4 3]
 [0 2 2 0 4 4]]
[[0 1 4 3 2 3]
 [2 3 0 1 2 2]]
[[1 0 3 2 1 1]]
[[4 4 3 0 2 1]]
[[3 2 3 2 3 0]]
[[0 0 1 0 4 3]]
[[1 4 2 4 1 0]]
[[0 1 1 4 0 1]]
[[4 1 2 3 4 3]
 [0 0 4 3 1 4]
 [3 4 4 0 3 0]]


In [4]:
keys, idx, gr_counts = np.unique(arr[:, [0,1]], return_inverse=True, return_counts=True, axis=0)

print(keys)
print(idx)
print(gr_counts)

[[0 2]
 [1 0]
 [1 1]
 [1 4]
 [2 0]
 [2 2]
 [3 0]
 [3 4]
 [4 0]
 [4 1]
 [4 2]
 [4 3]]
[ 4 11  8  5  3  4  5  9  4  4 10  2 11 11  9  1  6  0  7  8]
[1 1 1 1 4 2 1 1 2 2 1 3]


In [5]:
keys, idx_sort, gr_counts = unique(arr[:, [0,1]], return_argsort=True, return_counts=True, axis=0)

print(keys)
print(idx_sort)
print(gr_counts)

[[0 2]
 [1 0]
 [1 1]
 [1 4]
 [2 0]
 [2 2]
 [3 0]
 [3 4]
 [4 0]
 [4 1]
 [4 2]
 [4 3]]
[17 15 11  4  0  8  9  5  3  6 16 18  2 19  7 14 10 12 13  1]
[1 1 1 1 4 2 1 1 2 2 1 3]


In [6]:
arr[idx_sort]

array([[0, 2, 2, 1, 1, 1, 1, 1],
       [1, 0, 1, 3, 4, 0, 2, 3],
       [1, 1, 2, 2, 2, 3, 1, 3],
       [1, 4, 3, 2, 0, 0, 4, 1],
       [2, 0, 1, 4, 2, 2, 4, 4],
       [2, 0, 1, 2, 1, 1, 0, 2],
       [2, 0, 0, 2, 1, 4, 2, 4],
       [2, 0, 3, 1, 1, 2, 1, 2],
       [2, 2, 2, 0, 2, 2, 1, 4],
       [2, 2, 1, 0, 1, 1, 1, 3],
       [3, 0, 4, 4, 0, 0, 1, 4],
       [3, 4, 3, 3, 2, 2, 2, 3],
       [4, 0, 2, 0, 3, 1, 2, 3],
       [4, 0, 3, 0, 4, 0, 4, 4],
       [4, 1, 2, 1, 4, 3, 0, 1],
       [4, 1, 2, 0, 4, 1, 1, 2],
       [4, 2, 1, 3, 1, 2, 0, 4],
       [4, 3, 0, 0, 4, 1, 2, 2],
       [4, 3, 3, 4, 3, 2, 4, 0],
       [4, 3, 1, 4, 1, 0, 4, 2]])

In [32]:
keys = unique(arr[:, [0,1]], axis=0)

keys

array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1],
       [1, 2],
       [2, 3],
       [3, 0],
       [3, 1],
       [4, 0],
       [4, 4]])

In [52]:
unique(arr)

array([0.02236511, 0.02949067, 0.03919346, 0.04324176, 0.05238025,
       0.05700188, 0.07574789, 0.0937416 , 0.09798186, 0.10481368,
       0.11654544, 0.15768344, 0.16829838, 0.20203639, 0.22737635,
       0.25055886, 0.25602445, 0.297124  , 0.29816872, 0.31849499,
       0.32104293, 0.32499422, 0.34106788, 0.34398853, 0.34645493,
       0.35675297, 0.37550201, 0.4022696 , 0.43695117, 0.45343885,
       0.46210976, 0.46730753, 0.48391846, 0.48832518, 0.49766573,
       0.51947759, 0.52408822, 0.52944805, 0.53833305, 0.54290979,
       0.54296732, 0.54437364, 0.55104393, 0.5684356 , 0.5750179 ,
       0.57596841, 0.5904927 , 0.59107418, 0.59140823, 0.6028593 ,
       0.64919599, 0.66102379, 0.68490628, 0.70404084, 0.70706983,
       0.70789307, 0.70938084, 0.71381829, 0.72245451, 0.72833295,
       0.73812875, 0.75999633, 0.77921734, 0.79039096, 0.79857496,
       0.80156811, 0.80706562, 0.80884942, 0.80957604, 0.8205412 ,
       0.88844965, 0.89364472, 0.90416407, 0.91177467, 0.92881

In [54]:
unique(arr[:, [0, 1]], return_sorted=True, axis=0)

(array([[0.02236511, 0.6028593 ],
        [0.10481368, 0.20203639],
        [0.16829838, 0.59107418],
        [0.29816872, 0.48391846],
        [0.5750179 , 0.49766573],
        [0.72245451, 0.64919599],
        [0.73812875, 0.4022696 ],
        [0.79857496, 0.94034431],
        [0.91177467, 0.59140823],
        [0.94667228, 0.72833295]]),
 array([(0.02236511, 0.6028593 ), (0.10481368, 0.20203639),
        (0.16829838, 0.59107418), (0.29816872, 0.48391846),
        (0.5750179 , 0.49766573), (0.72245451, 0.64919599),
        (0.73812875, 0.4022696 ), (0.79857496, 0.94034431),
        (0.91177467, 0.59140823), (0.94667228, 0.72833295)],
       dtype=[('f0', '<f8'), ('f1', '<f8')]))

In [33]:
np.unique(arr, axis=0)

array([[0.18618169, 0.1990745 , 0.95651843, 0.24468395, 0.06236392,
        0.62894457, 0.68021542, 0.51425232],
       [0.23751583, 0.61357717, 0.25356265, 0.69183526, 0.5224117 ,
        0.70951082, 0.64099621, 0.18435927],
       [0.24568516, 0.83308974, 0.71362937, 0.75411554, 0.12040228,
        0.63007463, 0.89820534, 0.34068661],
       [0.27252698, 0.68866864, 0.48439722, 0.40639269, 0.7647336 ,
        0.23064239, 0.51839465, 0.04083881],
       [0.39214537, 0.12367908, 0.26848504, 0.01154801, 0.37383687,
        0.06214159, 0.4378668 , 0.80613878],
       [0.47229325, 0.33010813, 0.62403174, 0.03577825, 0.92230743,
        0.28859304, 0.78340322, 0.4697385 ],
       [0.47475708, 0.61318387, 0.14469477, 0.52466554, 0.06202671,
        0.19378176, 0.88166198, 0.55346151],
       [0.50209286, 0.62455104, 0.18205143, 0.80802343, 0.71155866,
        0.39041149, 0.57550589, 0.76428645],
       [0.73051462, 0.88160894, 0.69955685, 0.20838158, 0.89455482,
        0.3246689 , 0.535592

In [20]:
arr.view(np.int8)

array([[  70,   47,   72,  -70,  117,   17,  -45,   63,   64,   86,  -47,
          51,   28,  -31,  -82,   63,   18,  -10,  -47,  -86,  -69,   20,
         -39,   63,  106,  -97,  109,  104,  -97,   -3,  -38,   63,  -62,
          17,  -89,   62,  -41,   24,  -48,   63,   26,   57,   60,   99,
         -11,   36,  -34,   63,  -50,  -48,   15,   96,  -20,   -4,  -32,
          63,  -78,  126, -101,    0,  -13,  -40,  -47,   63],
       [  67,  -63, -124,  -46,  -12,  -71,  -21,   63,   39,    7,   26,
         -44,    6,   38,  -25,   63,    0,   62,  -73,   97,  -57,   56,
        -114,   63,   76,  112,  107,    6,  -27, -111,  -31,   63,   17,
          99,   46,  -98,   22,  -58,  -29,   63,   80, -104,    9,  -36,
          22,    3,  -57,   63,   68,  -87,   57,  114, -100,   91,  -25,
          63,  100,   69,    7, -127,   32, -105,  -61,   63],
       [ -34,  -67,    2,  -55,  -13, -112,  -26,   63, -120,   90,   19,
         -15,  -69,   57,  -60,   63,   75,   31,   -9,   47

In [15]:
def unique(ar, return_index=False, return_inverse=False,
           return_counts=False, axis=None):
    """
    """
    consolidated = ar.view(dtype)

    def reshape_uniq(uniq):
        n = len(uniq)
        uniq = uniq.view(orig_dtype)
        uniq = uniq.reshape(n, *orig_shape[1:])
        uniq = np.moveaxis(uniq, 0, axis)
        return uniq

    output = _unique1d(consolidated, return_index,
                       return_inverse, return_counts)
    output = (reshape_uniq(output[0]),) + output[1:]
    return _unpack_tuple(output)


def _unique1d(ar, return_index=False, return_inverse=False,
              return_counts=False):
    """
    Find the unique elements of an array, ignoring shape.
    """
    ar = np.asanyarray(ar).flatten()

    optional_indices = return_index or return_inverse

    if optional_indices:
        perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
        aux = ar[perm]
    else:
        ar.sort()
        aux = ar
    mask = np.empty(aux.shape, dtype=np.bool_)
    mask[:1] = True
    mask[1:] = aux[1:] != aux[:-1]

    ret = (aux[mask],)
    if return_index:
        ret += (perm[mask],)
    if return_inverse:
        imask = np.cumsum(mask) - 1
        inv_idx = np.empty(mask.shape, dtype=np.intp)
        inv_idx[perm] = imask
        ret += (inv_idx,)
    if return_counts:
        idx = np.concatenate(np.nonzero(mask) + ([mask.size],))
        ret += (np.diff(idx),)
    return ret

array([[(0.60673833, 0.30465059, 0.78065063, 0.10125054, 0.08716328, 0.19037581, 0.37704733, 0.94580625)],
       [(0.62080862, 0.27985347, 0.02523209, 0.96432033, 0.62117012, 0.28557623, 0.9131523 , 0.80516092)],
       [(0.40472566, 0.26093119, 0.20849898, 0.14671453, 0.79051106, 0.14150988, 0.84134921, 0.38512302)],
       [(0.67809926, 0.06625714, 0.42341014, 0.11118616, 0.86836757, 0.26128285, 0.15963756, 0.43211252)],
       [(0.36088281, 0.56086062, 0.29425393, 0.87412396, 0.08490469, 0.7353515 , 0.50851372, 0.1524794 )],
       [(0.80762244, 0.22059442, 0.12839469, 0.08217813, 0.10125587, 0.03176327, 0.26178235, 0.70543754)],
       [(0.83250966, 0.19713193, 0.92099615, 0.2864089 , 0.33389092, 0.5585048 , 0.49734342, 0.35935327)],
       [(0.91980321, 0.51615473, 0.61473168, 0.76443049, 0.01299176, 0.06884088, 0.99437036, 0.85106146)],
       [(0.12588075, 0.66921332, 0.45592208, 0.80011814, 0.23685639, 0.58924854, 0.14376085, 0.44238088)],
       [(0.92795673, 0.28777622, 0.21

In [11]:
orig_shape, orig_dtype = ar.shape, ar.dtype
ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))
ar = np.ascontiguousarray(ar)xx
dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]

In [17]:
# At this point, `ar` has shape `(n, m)`, and `dtype` is a structured
# data type with `m` fields where each field has the data type of `ar`.
# In the following, we create the array `consolidated`, which has
# shape `(n,)` with data type `dtype`.
if ar.shape[1] > 0:
    consolidated = ar.view(dtype)
else:
    # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is
    # a data type with itemsize 0, and the call `ar.view(dtype)` will
    # fail.  Instead, we'll use `np.empty` to explicitly create the
    # array with shape `(len(ar),)`.  Because `dtype` in this case has
    # itemsize 0, the total size of the result is still 0 bytes.
    consolidated = np.empty(len(ar), dtype=dtype)

blah


In [12]:
ar.shape

(10, 8)