In [1]:
import numpy as np
from numba import jit, vectorize, float32, float64
import pandas as pd
from smartFormat import simpleFormat

# Test speed of tensor operations

## Create test data
This is equivalent to the first-dimension-concatenated array that results from $\nu_e$ and $\nu_\mu$ fluxes binned at $400 \;E \times 400 \cos\theta$ bins.

The resulting *input* array is then $2\times400\times400$, and the transform that must (effectively) left-multiply that is $3\times 2\times400\times400$.

This hopefully represents a realistic scenario for performing an accurate oscillation calculation.

In [2]:
np.random.seed(0)
xform = np.require(
    a=np.random.random_sample((3, 2, 400, 400)),
    dtype=np.float64,
    requirements=['C_CONTIGUOUS', 'ALIGNED']
)
inputs = np.require(
    a=np.random.random_sample((2, 400, 400)),
    dtype=np.float64,
    requirements=['C_CONTIGUOUS', 'ALIGNED']
)
xform_fp32 = np.array(xform, dtype=np.float32)
inputs_fp32 = np.array(inputs, dtype=np.float32)

In [3]:
print 'xform.dtype =', xform.dtype
print 'xform.flags =\n', xform.flags
print 'inputs.dtype =', inputs.dtype
print 'inputs.flags =\n', inputs.flags

xform.dtype = float64
xform.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
inputs.dtype = float64
inputs.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False


In [4]:
print 'xform_fp32.dtype =', xform_fp32.dtype
print 'xform_fp32.flags =\n', xform_fp32.flags
print 'inputs_fp32.dtype =', inputs_fp32.dtype
print 'inputs_fp32.flags =\n', inputs_fp32.flags

xform_fp32.dtype = float32
xform_fp32.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
inputs_fp32.dtype = float32
inputs_fp32.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False


## Numpy using einsum

### Float64 math on float64 inputs/transforms

In [5]:
ein_64m_64op = %timeit -r 10 -q -o np.einsum('ij..., j...', xform, inputs, dtype=np.float64, casting='unsafe');

In [6]:
ein_64m_64op_med = np.median(ein_64m_64op.all_runs) / ein_64m_64op.loops
print 'Median time, einsum FP64 math / FP64 operands:', \
        simpleFormat(ein_64m_64op_med) + ' sec'


Median time, einsum FP64 math / FP64 operands: 1.206e-3 sec


In [7]:
output_einsum = np.einsum('ij..., j...', xform, inputs)
print output_einsum.shape

(400, 400, 3)


check that it's doing what I want it to do

In [8]:
x = xform[:,:,1,10]
x

array([[ 0.01560606,  0.18573089],
       [ 0.10818345,  0.34420619],
       [ 0.74177861,  0.28394148]])

In [9]:
i = inputs[:,1,10]
i

array([ 0.89478297,  0.46917306])

In [10]:
o = np.dot(x, i)
o

array([ 0.10110397,  0.25829298,  0.79694856])

In [11]:
output_einsum[1,10,:]

array([ 0.10110397,  0.25829298,  0.79694856])

In [12]:
np.all(o == output_einsum[1,10,:])

True

### Float32 math on float64 inputs/transforms

In [13]:
ein_32m_64op = %timeit -r 10 -q -o np.einsum('ij..., j...', xform, inputs, dtype=np.float32, casting='unsafe');

In [14]:
ein_32m_64op_med = np.median(ein_32m_64op.all_runs) / ein_32m_64op.loops
print 'Median time, einsum FP32 math / FP64 operands:', \
        simpleFormat(ein_32m_64op_med) + ' sec'
print simpleFormat(ein_32m_64op_med / ein_64m_64op_med*100)+'% of ein64_64'


Median time, einsum FP32 math / FP64 operands: 1.189e-3 sec
98.64% of ein64_64


### Float32 math on float32 inputs/transforms

In [15]:
ein_32m_32op = %timeit -r 10 -q -o np.einsum('ij..., j...', xform_fp32, inputs_fp32, dtype=np.float32, casting='no');

In [16]:
ein_32m_32op_med = np.median(ein_32m_32op.all_runs) / ein_32m_32op.loops
print 'Median time, einsum FP32 math / FP32 operands:', \
        simpleFormat(ein_32m_32op_med) + ' sec'
print simpleFormat(ein_32m_32op_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, einsum FP32 math / FP32 operands: 4.681e-4 sec
38.83% of ein64_64


## Python looping

### Float64 math / float64 operands

In [17]:
def apply_python_fp64(inputs, transform):
    N_k = inputs.shape[1]
    N_l = inputs.shape[2]
    output = np.empty((N_k, N_l, 3), np.float64)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[0,0,k,l]*inputs[0,k,l] +
                transform[0,1,k,l]*inputs[1,k,l]
            )
            output[k,l,1] = (
                transform[1,0,k,l]*inputs[0,k,l] +
                transform[1,1,k,l]*inputs[1,k,l]
            )
            output[k,l,2] = (
                transform[2,0,k,l]*inputs[0,k,l] +
                transform[2,1,k,l]*inputs[1,k,l]
            )
    return output

In [18]:
py_64m_64op = %timeit -r 5 -q -o apply_python_fp64(inputs, xform);

In [19]:
py_64m_64op_med = np.median(py_64m_64op.all_runs) / py_64m_64op.loops
print 'Median time, Python FP64 math / FP64 operands:', \
        simpleFormat(py_64m_64op_med) + ' sec'
print simpleFormat(py_64m_64op_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Python FP64 math / FP64 operands: 3.968e-1 sec
3.291e4% of ein64_64


In [20]:
output_python = apply_python_fp64(inputs, xform)
np.all(output_python == output_einsum)

True

### Float32 math / float32 operands

In [21]:
def apply_python_fp32(inputs, transform):
    N_k = inputs.shape[1]
    N_l = inputs.shape[2]
    output = np.empty((N_k, N_l, 3), np.float32)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[0,0,k,l]*inputs[0,k,l] +
                transform[0,1,k,l]*inputs[1,k,l]
            )
            output[k,l,1] = (
                transform[1,0,k,l]*inputs[0,k,l] +
                transform[1,1,k,l]*inputs[1,k,l]
            )
            output[k,l,2] = (
                transform[2,0,k,l]*inputs[0,k,l] +
                transform[2,1,k,l]*inputs[1,k,l]
            )
    return output

In [22]:
py_32m_32op = %timeit -r 5 -q -o apply_python_fp32(inputs_fp32, xform_fp32);

In [23]:
py_32m_32op_med = np.median(py_32m_32op.all_runs) / py_32m_32op.loops
print 'Median time, Python FP32 math / FP32 operands:', \
        simpleFormat(py_32m_32op_med) + ' sec'
print simpleFormat(py_32m_32op_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Python FP32 math / FP32 operands: 3.983e-1 sec
3.305e4% of ein64_64


## Numba

### Float64 math on float64 operands

In [24]:
@jit("float64[:,:,:](float64[:,:,:], float64[:,:,:,:])",
     nopython=True, nogil=True, cache=True)
def apply_numba_fp64(inputs, transform):
    N_k = inputs.shape[1]
    N_l = inputs.shape[2]
    output = np.empty((N_k, N_l, 3), float64)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[0,0,k,l]*inputs[0,k,l] +
                transform[0,1,k,l]*inputs[1,k,l]
            )
            output[k,l,1] = (
                transform[1,0,k,l]*inputs[0,k,l] +
                transform[1,1,k,l]*inputs[1,k,l]
            )
            output[k,l,2] = (
                transform[2,0,k,l]*inputs[0,k,l] +
                transform[2,1,k,l]*inputs[1,k,l]
            )
    return output

In [25]:
nu_64m_64op = %timeit -r 10 -q -o apply_numba_fp64(inputs, xform)

In [26]:
nu_64m_64op_med = np.median(nu_64m_64op.all_runs) / nu_64m_64op.loops
print 'Median time, Numba FP64 math / FP64 operands:', \
        simpleFormat(nu_64m_64op_med) + ' sec'
print simpleFormat(nu_64m_64op_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Numba FP64 math / FP64 operands: 5.702e-4 sec
47.3% of ein64_64


In [27]:
output_numba = apply_numba_fp64(inputs, xform)
np.all(output_numba == output_einsum)

True

### Float32 math on float32 operands

In [28]:
@jit("float32[:,:,:](float32[:,:,:], float32[:,:,:,:])", nopython=True, nogil=True, cache=True)
def apply_numba_fp32(inputs, transform):
    N_k = inputs.shape[1]
    N_l = inputs.shape[2]
    output = np.empty((N_k, N_l, 3), float32)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[0,0,k,l]*inputs[0,k,l] +
                transform[0,1,k,l]*inputs[1,k,l]
            )
            output[k,l,1] = (
                transform[1,0,k,l]*inputs[0,k,l] +
                transform[1,1,k,l]*inputs[1,k,l]
            )
            output[k,l,2] = (
                transform[2,0,k,l]*inputs[0,k,l] +
                transform[2,1,k,l]*inputs[1,k,l]
            )
    return output

In [29]:
nu_32m_32op = %timeit -r 10 -q -o apply_numba_fp32(inputs_fp32, xform_fp32)

In [30]:
nu_32m_32op_med = np.median(nu_32m_32op.all_runs) / nu_32m_32op.loops
print 'Median time, Numba FP32 math / FP32 operands:', \
        simpleFormat(nu_32m_32op_med) + ' sec'
print simpleFormat(nu_32m_32op_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Numba FP32 math / FP32 operands: 4.401e-4 sec
36.5% of ein64_64


# What about axes ordering?

How are these affected if we change the order of the axes? I.e., keep C memory layout, but join by flavor on the *last* dimension rather than the first.

In [31]:
np.random.seed(0)
xform = np.array(np.random.random_sample((400, 400, 3, 2)),
                 dtype=np.float64)
inputs = np.array(np.random.random_sample((400, 400, 2)),
                  dtype=np.float64)

xform_fp32 = np.array(xform, dtype=np.float32)
inputs_fp32 = np.array(inputs, dtype=np.float32)

In [32]:
np.random.seed(0)
xform = np.require(
    a=np.random.random_sample((400, 400, 3, 2)),
    dtype=np.float64,
    requirements=['C_CONTIGUOUS', 'ALIGNED']
)
inputs = np.require(
    a=np.random.random_sample((400, 400, 2)),
    dtype=np.float64,
    requirements=['C_CONTIGUOUS', 'ALIGNED']
)
xform_fp32 = np.array(xform, dtype=np.float32)
inputs_fp32 = np.array(inputs, dtype=np.float32)

In [33]:
print 'xform.dtype =', xform.dtype
print 'xform.flags =\n', xform.flags
print 'inputs.dtype =', inputs.dtype
print 'inputs.flags =\n', inputs.flags

xform.dtype = float64
xform.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
inputs.dtype = float64
inputs.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False


In [34]:
print 'xform_fp32.dtype =', xform_fp32.dtype
print 'xform_fp32.flags =\n', xform_fp32.flags
print 'inputs_fp32.dtype =', inputs_fp32.dtype
print 'inputs_fp32.flags =\n', inputs_fp32.flags

xform_fp32.dtype = float32
xform_fp32.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
inputs_fp32.dtype = float32
inputs_fp32.flags =
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False


In [35]:
ein_64m_64op_sw = %timeit -r 10 -q -o np.einsum('...ij, ...j', xform, inputs, dtype=np.float64, casting='unsafe');

In [36]:
ein_64m_64op_sw_med = np.median(ein_64m_64op_sw.all_runs) / ein_64m_64op_sw.loops
print 'Median time, einsum FP64 math / FP64 operands swapped axes:', \
        simpleFormat(ein_64m_64op_sw_med) + ' sec'
print simpleFormat(ein_64m_64op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, einsum FP64 math / FP64 operands swapped axes: 2.824e-3 sec
234.2% of ein64_64


In [37]:
ein_32m_64op_sw = %timeit -r 10 -q -o np.einsum('...ij, ...j', xform, inputs, dtype=np.float32, casting='unsafe');

In [38]:
ein_32m_64op_sw_med = np.median(ein_32m_64op_sw.all_runs) / ein_32m_64op_sw.loops
print 'Median time, einsum FP32 math / FP64 operands swapped axes:', \
        simpleFormat(ein_32m_64op_sw_med) + ' sec'
print simpleFormat(ein_32m_64op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, einsum FP32 math / FP64 operands swapped axes: 2.987e-2 sec
2477% of ein64_64


In [39]:
ein_32m_32op_sw = %timeit -r 10 -q -o np.einsum('...ij, ...j', xform_fp32, inputs_fp32, dtype=np.float32, casting='unsafe');

In [40]:
ein_32m_32op_sw_med = np.median(ein_32m_32op_sw.all_runs) / ein_32m_32op_sw.loops
print 'Median time, einsum FP32 math / FP32 operands swapped axes:', \
        simpleFormat(ein_32m_32op_sw_med) + ' sec'
print simpleFormat(ein_32m_32op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, einsum FP32 math / FP32 operands swapped axes: 3.751e-3 sec
311.1% of ein64_64


### Python looping

#### 64 bit math on 64 bit operands

In [41]:
def apply_python_fp64_sw(inputs, transform):
    N_k = inputs.shape[0]
    N_l = inputs.shape[1]
    output = np.empty((N_k, N_l, 3), np.float64)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[k,l,0,0]*inputs[k,l,0] +
                transform[k,l,0,1]*inputs[k,l,1]
            )
            output[k,l,1] = (
                transform[k,l,1,0]*inputs[k,l,0] +
                transform[k,l,1,1]*inputs[k,l,1]
            )
            output[k,l,2] = (
                transform[k,l,2,0]*inputs[k,l,0] +
                transform[k,l,2,1]*inputs[k,l,1]
            )
    return output

In [42]:
py_64m_64op_sw = %timeit -r 5 -q -o apply_python_fp64_sw(inputs, xform);

In [43]:
py_64m_64op_sw_med = np.median(py_64m_64op_sw.all_runs) / py_64m_64op_sw.loops
print 'Median time, Python FP64 math / FP64 operands swapped axes:', \
        simpleFormat(py_64m_64op_sw_med) + ' sec'
print simpleFormat(py_64m_64op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Python FP64 math / FP64 operands swapped axes: 4.055e-1 sec
3.364e4% of ein64_64


In [44]:
def apply_python_fp32_sw(inputs, transform):
    N_k = inputs.shape[0]
    N_l = inputs.shape[1]
    output = np.empty((N_k, N_l, 3), np.float32)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[k,l,0,0]*inputs[k,l,0] +
                transform[k,l,0,1]*inputs[k,l,1]
            )
            output[k,l,1] = (
                transform[k,l,1,0]*inputs[k,l,0] +
                transform[k,l,1,1]*inputs[k,l,1]
            )
            output[k,l,2] = (
                transform[k,l,2,0]*inputs[k,l,0] +
                transform[k,l,2,1]*inputs[k,l,1]
            )
    return output

In [45]:
py_32m_32op_sw = %timeit -r 5 -q -o apply_python_fp32_sw(inputs_fp32, xform_fp32);

In [46]:
py_32m_32op_sw_med = np.median(py_32m_32op_sw.all_runs) / py_32m_32op_sw.loops
print 'Median time, Python FP32 math / FP32 operands swapped axes:', \
        simpleFormat(py_32m_32op_sw_med) + ' sec'
print simpleFormat(py_32m_32op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Python FP32 math / FP32 operands swapped axes: 0.444 sec
3.683e4% of ein64_64


### Numba looping

#### 64 bit math on 64 bit operands

In [47]:
@jit("float64[:,:,:](float64[:,:,:], float64[:,:,:,:])",
     nopython=True, nogil=True, cache=True)
def apply_numba_fp64_sw(inputs, transform):
    N_k = inputs.shape[0]
    N_l = inputs.shape[1]
    output = np.empty((N_k, N_l, 3), float64)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[k,l,0,0]*inputs[k,l,0] +
                transform[k,l,0,1]*inputs[k,l,1]
            )
            output[k,l,1] = (
                transform[k,l,1,0]*inputs[k,l,0] +
                transform[k,l,1,1]*inputs[k,l,1]
            )
            output[k,l,2] = (
                transform[k,l,2,0]*inputs[k,l,0] +
                transform[k,l,2,1]*inputs[k,l,1]
            )
    return output

In [48]:
nu_64m_64op_sw = %timeit -r 10 -q -o apply_numba_fp64_sw(inputs, xform)

In [49]:
nu_64m_64op_sw_med = np.median(nu_64m_64op_sw.all_runs) / nu_64m_64op_sw.loops
print 'Median time, Numba FP64 math / FP64 operands swapped axes:', \
        simpleFormat(nu_64m_64op_sw_med) + ' sec'
print simpleFormat(nu_64m_64op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Numba FP64 math / FP64 operands swapped axes: 5.374e-4 sec
44.57% of ein64_64


#### 32 bit math on 32 bit operands

In [50]:
@jit("float32[:,:,:](float32[:,:,:], float32[:,:,:,:])",
     nopython=True, nogil=True, cache=True)
def apply_numba_fp32_sw(inputs, transform):
    N_k = inputs.shape[0]
    N_l = inputs.shape[1]
    output = np.empty((N_k, N_l, 3), float32)
    for k in range(N_k):
        for l in range(N_l):
            output[k,l,0] = (
                transform[k,l,0,0]*inputs[k,l,0] +
                transform[k,l,0,1]*inputs[k,l,1]
            )
            output[k,l,1] = (
                transform[k,l,1,0]*inputs[k,l,0] +
                transform[k,l,1,1]*inputs[k,l,1]
            )
            output[k,l,2] = (
                transform[k,l,2,0]*inputs[k,l,0] +
                transform[k,l,2,1]*inputs[k,l,1]
            )
    return output

In [51]:
nu_32m_32op_sw = %timeit -r 10 -q -o apply_numba_fp32_sw(inputs_fp32, xform_fp32)

In [52]:
nu_32m_32op_sw_med = np.median(nu_32m_32op_sw.all_runs) / nu_32m_32op_sw.loops
print 'Median time, Numba FP32 math / FP32 operands swapped axes:', \
        simpleFormat(nu_32m_32op_sw_med) + ' sec'
print simpleFormat(nu_32m_32op_sw_med / ein_64m_64op_med*100)+'% of ein64_64'

Median time, Numba FP32 math / FP32 operands swapped axes: 3.917e-4 sec
32.49% of ein64_64


# Show summary of timing results

Tabulate the results for original axes ordering.

In [53]:
timings = [
    {'Python FP64math FP64op': py_64m_64op_med},
    {'Python FP32math FP32op': py_32m_32op_med},
    {'einsum FP64math FP64op': ein_64m_64op_med},
    {'einsum FP32math FP64op': ein_32m_64op_med},
    {'einsum FP32math FP32op': ein_32m_32op_med},
    {'Numba FP64math FP64op':  nu_64m_64op_med},
    {'Numba FP32math FP32op':  nu_32m_32op_med}
]
timings = pd.DataFrame(pd.Series(
    [t.values()[0] for t in timings],
    [t.keys()[0] for t in timings],
)).T;

Tabulate the results for swapped axes ordering.

In [54]:
timings_sw = [
    {'Python FP64math FP64op axswp': py_64m_64op_sw_med},
    {'Python FP64math FP32op axswp': py_32m_32op_sw_med},
    {'einsum FP64math FP64op axswp': ein_64m_64op_sw_med},
    {'einsum FP32math FP64op axswp': ein_32m_64op_sw_med},
    {'einsum FP32math FP32op axswp': ein_32m_32op_sw_med},
    {'Numba FP64math FP64op axswp':  nu_64m_64op_sw_med},
    {'Numba FP32math FP32op axswp':  nu_32m_32op_sw_med}
]
timings_sw = pd.DataFrame(pd.Series(
    [t.values()[0] for t in timings_sw],
    [t.keys()[0] for t in timings_sw],
)).T;

## Absolute timings (sec)

### Original axes ordering (flavor concatenated on first dimension)

In [55]:
timings

Unnamed: 0,Python FP64math FP64op,Python FP32math FP32op,einsum FP64math FP64op,einsum FP32math FP64op,einsum FP32math FP32op,Numba FP64math FP64op,Numba FP32math FP32op
0,0.396805,0.398446,0.001206,0.001189,0.000468,0.00057,0.00044


### Swapped axes ordering (flavor concatenated on last dimension)

In [56]:
timings_sw

Unnamed: 0,Python FP64math FP64op axswp,Python FP64math FP32op axswp,einsum FP64math FP64op axswp,einsum FP32math FP64op axswp,einsum FP32math FP32op axswp,Numba FP64math FP64op axswp,Numba FP32math FP32op axswp
0,0.405538,0.444001,0.002824,0.029865,0.003751,0.000537,0.000392


## Timings as fraction of einsum FP64-math, FP64-operands, orig. axes ordering

### Original axes ordering (flavor concatenated on first dimension)

In [57]:
timings / timings['einsum FP64math FP64op'].values

Unnamed: 0,Python FP64math FP64op,Python FP32math FP32op,einsum FP64math FP64op,einsum FP32math FP64op,einsum FP32math FP32op,Numba FP64math FP64op,Numba FP32math FP32op
0,329.135034,330.496212,1.0,0.986363,0.388258,0.472959,0.365044


### Swapped axes ordering (flavor concatenated on last dimension)

In [58]:
timings_sw / timings['einsum FP64math FP64op'].values

Unnamed: 0,Python FP64math FP64op axswp,Python FP64math FP32op axswp,einsum FP64math FP64op axswp,einsum FP32math FP64op axswp,einsum FP32math FP32op axswp,Numba FP64math FP64op axswp,Numba FP32math FP32op axswp
0,336.378761,368.282288,2.342358,24.772077,3.111444,0.445726,0.324939


# Computer used for test

In [66]:
!!hostname

['schwyz']

In [67]:
!!lscpu

['Architecture:          x86_64',
 'CPU op-mode(s):        32-bit, 64-bit',
 'Byte Order:            Little Endian',
 'CPU(s):                32',
 'On-line CPU(s) list:   0-31',
 'Thread(s) per core:    2',
 'Core(s) per socket:    8',
 'Socket(s):             2',
 'NUMA node(s):          2',
 'Vendor ID:             GenuineIntel',
 'CPU family:            6',
 'Model:                 63',
 'Model name:            Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz',
 'Stepping:              2',
 'CPU MHz:               1352.531',
 'CPU max MHz:           3200.0000',
 'CPU min MHz:           1200.0000',
 'BogoMIPS:              4790.89',
 'Virtualization:        VT-x',
 'L1d cache:             32K',
 'L1i cache:             32K',
 'L2 cache:              256K',
 'L3 cache:              20480K',
 'NUMA node0 CPU(s):     0-7,16-23',
 'NUMA node1 CPU(s):     8-15,24-31',
 'Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse s