In [2]:
import numpy as np

通过**切片**获取的新数组是原始数组的一个视图，它与原始数组共享同一块数据存储区域；通过**整数列表**、**整数数组**和**布尔数组**获取的新数组不和原始数组共享内存区域。

### 多维数组

In [3]:
a = np.arange(0, 60, 10).reshape(-1, 1) + np.arange(0, 6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [4]:
idx = slice(None, None, 2), slice(2, None)

In [5]:
# a[idx] = a[::2, 2:]
a[idx]

array([[ 2,  3,  4,  5],
       [22, 23, 24, 25],
       [42, 43, 44, 45]])

In [6]:
# a[idx][idx] = a[::2, 2:][::2, 2:]
a[idx][idx]

array([[ 4,  5],
       [44, 45]])

In [7]:
# Numpy提供了一个s_对象来创建数组下标，s_实际上是IndexExpression类的一个对象，类似的对象还有mgrid和ogrid等
np.s_[::2, 2]

(slice(None, None, 2), 2)

In [8]:
# 通过两个元组获取二维数组相应位置上的元素，第一个元组代表元素的第0轴索引，第二个元组代表元素的第1轴索引
a[(0,1,2,3), (1,2,3,4)]

array([ 1, 12, 23, 34])

In [9]:
a[3:, [0,2,5]]

array([[30, 32, 35],
       [40, 42, 45],
       [50, 52, 55]])

In [10]:
mask = np.array([1,0,1,0,0,1], dtype=np.bool)
a[mask, 2]

array([ 2, 22, 52])

In [11]:
# a[[1,2]]=a[[1,2],:]
a[[1,2]]

array([[10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25]])

In [12]:
x = np.array([[0,1],[2,3]])
y = np.array([[-1,-2],[-3,-4]])
a[x, y]

array([[ 5, 14],
       [23, 32]])

In [13]:
a[x]

array([[[ 0,  1,  2,  3,  4,  5],
        [10, 11, 12, 13, 14, 15]],

       [[20, 21, 22, 23, 24, 25],
        [30, 31, 32, 33, 34, 35]]])

### 结构数组

In [14]:
persontype = np.dtype({
    'names'  :  ['name', 'age', 'weight'],
    'formats':  ['S30', 'i', 'f']}, align=True)
a = np.array([("Zhang", 32, 75.5), ("Wang", 24, 65.2)], dtype=persontype)

In [15]:
a.dtype

dtype({'names':['name','age','weight'], 'formats':['S30','<i4','<f4'], 'offsets':[0,32,36], 'itemsize':40}, align=True)

In [16]:
print a[0]
a[0].dtype

('Zhang', 32,  75.5)


dtype({'names':['name','age','weight'], 'formats':['S30','<i4','<f4'], 'offsets':[0,32,36], 'itemsize':40}, align=True)

In [17]:
a.tofile("test.bin")

In [18]:
%%file read_struct_array.c
#include <stdio.h>

struct person
{
    char name[30];
    int age;
    float weight;
};

struct person p[3];

int main()
{
    FILE *fp;
    int i;
    fp=fopen("test.bin", "rb");
    fread(p, sizeof(struct person), 2, fp);
    fclose(fp);
    for(i=0;i<2;i++)
    {
        printf("%s %d %f\n", p[i].name, p[i].age, p[i].weight);
    }
    
    return 0;
}

Overwriting read_struct_array.c


In [19]:
!gcc read_struct_array.c -o read_struct_array
!./read_struct_array

Zhang 32 75.500000
Wang 24 65.199997


In [20]:
np.dtype([('f1', [('f2', np.int16)])])

dtype([('f1', [('f2', '<i2')])])

In [21]:
np.dtype([('f0', 'i4'), ('f1', 'f8', (2,3))])

dtype([('f0', '<i4'), ('f1', '<f8', (2, 3))])

In [22]:
# 字典形式
np.dtype({'surname':('S25',0), 'age':(np.uint8, 25)})

dtype([('surname', 'S25'), ('age', 'u1')])

In [23]:
a = np.array([[0,1,2],[3,4,5],[6,7,8]], dtype=np.float32)
a.strides

(12, 4)

In [24]:
a.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [25]:
a.T.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [26]:
b = a[::2, ::2]
b.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [27]:
id(b.base), id(a)

(4460323744, 4460323744)

In [28]:
a = np.array([[0,1],[2,3,],[4,5]], dtype=np.float32)

In [29]:
from numpy.lib.stride_tricks import as_strided

In [30]:
a = np.arange(6, dtype=np.int32)
b = as_strided(a, shape=(4,3), strides=(4,4))
print a
print b

[0 1 2 3 4 5]
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]]


### ufunc函数

In [31]:
import math

x = [i * 0.001 for i in xrange(1000000)]

def sin_math(x):
    for i, t in enumerate(x):
        x[i] = math.sin(t)
        
def sin_numpy(x):
    np.sin(x, x)
    
def sin_numpy_loop(x):
    for i, t in enumerate(x):
        x[i] = np.sin(t)
        
xl = x[:]
%time sin_math(x)

xa = np.array(x)
%time sin_numpy(xa)

xl = x[:]
%time sin_numpy_loop(x)

CPU times: user 159 ms, sys: 7.32 ms, total: 166 ms
Wall time: 167 ms
CPU times: user 7.02 ms, sys: 37 µs, total: 7.06 ms
Wall time: 6.96 ms
CPU times: user 762 ms, sys: 20.9 ms, total: 783 ms
Wall time: 785 ms


In [32]:
x = np.linspace(0, 2*np.pi, 10)
y = np.sin(x)
print y

[  0.00000000e+00   6.42787610e-01   9.84807753e-01   8.66025404e-01
   3.42020143e-01  -3.42020143e-01  -8.66025404e-01  -9.84807753e-01
  -6.42787610e-01  -2.44929360e-16]


In [33]:
a = np.arange(5)
b = np.arange(4, -1, -1)
print a == b
print a > b
print np.logical_or(a == b, a > b)

[False False  True False False]
[False False False  True  True]
[False False  True  True  True]


In [34]:
a == b

array([False, False,  True, False, False], dtype=bool)

In [35]:
np.any(a == b)

True

In [36]:
np.any(a == b) and np.any(a > b)

True

In [37]:
print ~ np.arange(5)
print ~ np.arange(5, dtype=np.uint8)

[-1 -2 -3 -4 -5]
[255 254 253 252 251]


### 自定义ufunc函数
* `frompyfunc(func, nin, nout)`
* `vectorize(func, otypes)`

In [38]:
def triangle_wave(x, c, c0, hc):
    x = x - int(x)
    if x >= c: r = 0.0
    elif x < c0: r = x /c0 * hc
    else: r = (c - x) / (x - c0) * hc
    return r

x= np.linspace(0, 2, 1000)
triangle_ufunc1 = np.frompyfunc(triangle_wave, 4, 1)
y2 = triangle_ufunc1(x, 0.6, 0.4, 1.0)
print y2.astype(np.float).dtype

float64


In [39]:
triangle_ufunc2 = np.vectorize(triangle_wave, otypes=[np.float])
y3 = triangle_ufunc2(x, 0.6, 0.4, 1.0)

np.all(y2 == y3)

True

### 广播

In [40]:
a = np.arange(0, 60, 10).reshape(-1, 1)
b = np.arange(0, 5)
print a
print b
print a + b

[[ 0]
 [10]
 [20]
 [30]
 [40]
 [50]]
[0 1 2 3 4]
[[ 0  1  2  3  4]
 [10 11 12 13 14]
 [20 21 22 23 24]
 [30 31 32 33 34]
 [40 41 42 43 44]
 [50 51 52 53 54]]


1. `b`的`shape`属性向`a`对齐，在`b`的`shape`属性前加`1`，补齐为`(1,5)`
2. 将`a`和`b`的`shape`都扩展为`(6,5)`

In [41]:
b.shape = 1, 5
print b.repeat(6, axis=0)
print a.repeat(5, axis=1)

[[0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]]
[[ 0  0  0  0  0]
 [10 10 10 10 10]
 [20 20 20 20 20]
 [30 30 30 30 30]
 [40 40 40 40 40]
 [50 50 50 50 50]]


Numpy提供了`ogrid`对象，用于创建广播运算的数组。`ogrid`像多维数组一样，用切片元组作为下标，返回的是一组可以用来广播计算的数组。其切片下标有两种形式：
* 开始值:结束值:步长，和`np.arange`类似。
* 开始值:结束值:长度`j`，当第三个参数为虚数时，它表示返回数组的长度，和`np.linspace`类似。

In [42]:
x, y = np.ogrid[:5, :5]
print x
print y

print "---------------------"

x, y = np.ogrid[:1:4j, :1:3j]
print x
print y

[[0]
 [1]
 [2]
 [3]
 [4]]
[[0 1 2 3 4]]
---------------------
[[ 0.        ]
 [ 0.33333333]
 [ 0.66666667]
 [ 1.        ]]
[[ 0.   0.5  1. ]]


Numpy还提供了`mgrid`对象，返回的是进行广播之后的数组。

In [43]:
x, y = np.mgrid[:5, :5]
print x
print y

[[0 0 0 0 0]
 [1 1 1 1 1]
 [2 2 2 2 2]
 [3 3 3 3 3]
 [4 4 4 4 4]]
[[0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]]


---
`None`相当于在对应的位置创建一个长度为1的新轴，对于一维数组`a`，`a[None, :]`和`a.reshape(1, -1)`等效。

In [44]:
a = np.arange(4)
print a
print a[None, :]
print a[:, None]

[0 1 2 3]
[[0 1 2 3]]
[[0]
 [1]
 [2]
 [3]]


In [45]:
x = np.array([0,1,4,10])
y = np.array([2,3,8])
x[None, :] + y[:, None]

array([[ 2,  3,  6, 12],
       [ 3,  4,  7, 13],
       [ 8,  9, 12, 18]])

还可以使用`ix_()`将两个一个维数组转换成可广播的二维数组

In [46]:
gx, gy = np.ix_(x, y)
print gx
print gy
print gx + gy

[[ 0]
 [ 1]
 [ 4]
 [10]]
[[2 3 8]]
[[ 2  3  8]
 [ 3  4  9]
 [ 6  7 12]
 [12 13 18]]


In [47]:
a = np.array([1,2,3,4,5])
b = np.array([2,3,4])
print a.shape
a.shape += (1,)*b.ndim
print a.shape
print a.squeeze()

(5,)
(5, 1)
[1 2 3 4 5]


In [48]:
np.multiply.outer([1,2,3,4,5],[2,3,4])

array([[ 2,  3,  4],
       [ 4,  6,  8],
       [ 6,  9, 12],
       [ 8, 12, 16],
       [10, 15, 20]])

### 多维数组的下标存取

In [50]:
a = np.arange(3*4*5).reshape(3,4,5)
a

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]],

       [[20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39]],

       [[40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59]]])

In [59]:
i0 = np.array([[1,2,1], [0,1,0]])
i1 = np.array([[[0]], [[1]]])
i2 = np.array([[[2,3,2]]])
b = a[i0, i1, i2]
b

array([[[22, 43, 22],
        [ 2, 23,  2]],

       [[27, 48, 27],
        [ 7, 28,  7]]])

In [70]:
'''
i0,i1,i2三个整数数组的shape属性分别为(2,3)(2,1,1)(1,1,3)，根据广播规则，现在长度不足3的shape属性前面补1，使他们为维数相同，广播之后的shape属性为各个轴的最大值：
(1,2,3)
(2,1,1)
(1,1,3)
-------
(2,2,3)
'''
print b.shape

'''广播后的数组如下：'''
ind0, ind1, ind2 = np.broadcast_arrays(i0, i1, i2)
print "  ind0"
print "--------"
print ind0 
print
print "  ind1"
print "--------"
print ind1
print
print "  ind2"
print "--------"
print ind2

(2, 2, 3)
  ind0
--------
[[[1 2 1]
  [0 1 0]]

 [[1 2 1]
  [0 1 0]]]

  ind1
--------
[[[0 0 0]
  [0 0 0]]

 [[1 1 1]
  [1 1 1]]]

  ind2
--------
[[[2 3 2]
  [2 3 2]]

 [[2 3 2]
  [2 3 2]]]


In [72]:
id1, id2 = np.broadcast_arrays(np.array([0,0,1]), np.array([0,2,0]))
print id1
print id2

[0 0 1]
[0 2 0]


In [74]:
a = np.arange(3*4*5).reshape(3,4,5)
a

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]],

       [[20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39]],

       [[40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59]]])

In [76]:
# 下标数组不需要广播，但下标数组长度比数组a维度小1，所以第三维用：代替，相当于a[np.array([0,0,1]), np.array([0,2,0]), :]
a[np.array([0,0,1]), np.array([0,2,0])]

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14],
       [20, 21, 22, 23, 24]])

In [82]:
b2 = np.array([[True, False, True], [True, False, False]])
# a[1:3, b2] can't run, need flat the boolean array b2 manually and convert each one to integer array.
a[1:3, np.nonzero(b2)[0], np.nonzero(b2)[1]]

array([[20, 22, 25],
       [40, 42, 45]])