# Numpy基础

## 基本概念

Vectors：一维  
Marics：二维  
Arrays：三维  
Vectorized computing：比较快，不再需要使用循环  

## 1. Basics of Numpy arrays

In [1]:
import numpy as np

In [3]:
lst = [10,20,30,40] #列表
arr = np.array([10,20,30,40]) #将列表转为数组

In [7]:
type(arr)

numpy.ndarray

###  Element indexing

In [8]:
lst[0]

10

In [9]:
arr[-1]

40

In [10]:
arr[2:]

array([30, 40])

### Differences between arrays and lists 

The first difference is that arrays are homogeneous;  
i.e. all elements of an array must be of the same type.  
In contrast, lists can contain elements of arbitrary type.

In [12]:
lst[-1] = 'a string inside a list'
lst

[10, 20, 30, 'a string inside a list']

In [13]:
arr[-1] = 'a string inside a list'

ValueError: invalid literal for long() with base 10: 'a string inside a list'

###  Array Attributes

The information about the type of an array is contained in its dtype attribute:

In [14]:
arr.dtype

dtype('int64')

In [15]:
arr[-1] = 1.234
arr

array([10, 20, 30,  1])

### Creating Arrays 

In [16]:
np.zeros(5,dtype=float)

array([ 0.,  0.,  0.,  0.,  0.])

In [17]:
np.zeros(3,dtype=int)

array([0, 0, 0])

In [18]:
np.zeros(3,dtype=complex)

array([ 0.+0.j,  0.+0.j,  0.+0.j])

In [19]:
print('5 ones:',np.ones(5))

('5 ones:', array([ 1.,  1.,  1.,  1.,  1.]))


In [20]:
a = np.empty(4)
a.fill(5.5)
a

array([ 5.5,  5.5,  5.5,  5.5])

###  Defining various sequences

In [21]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [22]:
print("A liner grid between 0 and 1:")
print(np.linspace(0,1,4))

A liner grid between 0 and 1:
[ 0.          0.33333333  0.66666667  1.        ]


In [23]:
print("A logarithmic grid between 10**1 and 10**3:")
print(np.logspace(1,3,4))

A logarithmic grid between 10**1 and 10**3:
[   10.            46.41588834   215.443469    1000.        ]


###  Creating random arrays

In [24]:
np.random.randn(5)

array([ 0.44051513, -0.34652755,  0.32091012,  1.20353009, -1.66752639])

In [27]:
norm10 = np.random.normal(10,3,5) #期望值，标准差，数量
norm10

array([  6.02021948,  14.51199315,  11.95136397,  11.51678752,   9.53408925])

###  Index with other arrays

In [29]:
mask = norm10>9
mask

array([False,  True,  True,  True,  True], dtype=bool)

In [30]:
print("Values above 9:",norm10[mask])

('Values above 9:', array([ 14.51199315,  11.95136397,  11.51678752,   9.53408925]))


print("Resetting all values above 9 to 0...")
norm10[norm10>9] = 0
print(norm10)

## 2. Arrays with more than one dimension

In [3]:
import numpy as np

In [4]:
lst2 = [[1,2],[3,4]]
arr2 = np.array([[1,2],[3,4]])
arr2

array([[1, 2],
       [3, 4]])

In [7]:
print(lst2[0][1])
print(arr2[0,1])
print(arr2[0][1])

2
2
2


In [14]:
np.zeros(2)
np.zeros((2,3))#加括号，表示二维数组
#np.ones([2,3])

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [23]:
arr1 = np.random.normal(10,3,(2,3))
arr1.reshape(3,2)
#arr1

array([[  6.11839008,   5.98270723],
       [ 13.54119506,   8.17356136],
       [  9.95221096,   3.8116364 ]])

In [16]:
arr = np.arange(8).reshape(2,4)
arr

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

###  View, not Copies

In [27]:
arr = np.arange(8)
arr2 = arr.reshape(2,4)
#print(arr)
#print(arr2)
arr[0] = 1000
print(arr)
print(arr2)

[1000    1    2    3    4    5    6    7]
[[1000    1    2    3]
 [   4    5    6    7]]


### Slices

In [35]:
print('slicing in the second row:',arr2[1,2:4])
print('slicing in the second row:',arr2[1,2:3])
print('slicing in the second row:',arr2[1,2:2])
print('slicing in the second row:',arr2[1,2])
print('slicing in the second row:',arr2[1,0:1])#只能取到1个数字
print('All rows, third column :',arr2[:,2])

('slicing in the second row:', array([6, 7]))
('slicing in the second row:', array([6]))
('slicing in the second row:', array([], dtype=int64))
('slicing in the second row:', 6)
('slicing in the second row:', array([4]))
('All rows, third column :', array([2, 6]))


In [40]:
print('First row:',arr2[0])
print('First row:',arr2[0,:])
print('Second row:',arr2[1])
print('Second line',arr2[:,0])

('First row:', array([1000,    1,    2,    3]))
('First row:', array([1000,    1,    2,    3]))
('Second row:', array([4, 5, 6, 7]))
('Second line', array([1000,    4]))


###  Array Properties and Methods

In [45]:
arr = arr2
arr
#arr2

array([[1000,    1,    2,    3],
       [   4,    5,    6,    7]])

In [48]:
print('Data type               :',arr.dtype)
print('Total number of elements:',arr.size)
print('Number of dimensions    :',arr.ndim)
print('Shapre(dimensionality)  :',arr.shape)
print('Memory used(in bytes)   :',arr.nbytes)

('Data type               :', dtype('int64'))
('Total number of elements:', 8)
('Number of dimensions    :', 2)
('Shapre(dimensionality)  :', (2, 4))
('Memory used(in bytes)   :', 64)


In [49]:
print('Mininum and maxinum:',arr.min(),arr.max())
print('sum and product of all elements:',arr.sum(),arr.prod()) #prod乘法
print('Mean and standard deviation:',arr.mean(),arr.std())

('Mininum and maxinum:', 1, 1000)
('sum and product of all elements:', 1028, 5040000)
('Mean and standard deviation:', 128.5, 329.40135093833482)


In [50]:
1*2*3*4*5*6*7*1000

5040000

In [55]:
print('For the sollowing array:\n',arr)
print('The sum of elements along the rows is:',arr.sum(axis=1)) #求行和
print('The sum of elements along the columns is:',arr.sum(axis=0)) #求列和

('For the sollowing array:\n', array([[1000,    1,    2,    3],
       [   4,    5,    6,    7]]))
('The sum of elements along the rows is:', array([1006,   22]))
('The sum of elements along the columns is:', array([1004,    6,    8,   10]))


In [56]:
np.zeros((3,4,5,6)).sum(2).shape

(3, 4, 6)

In [58]:
np.zeros((3,4,5,6))

array([[[[ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.]],

        [[ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.]],

        [[ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.]],

        [[ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.]]],


       [[[ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.],
      

In [59]:
np.zeros((3,4,5,6)).sum(2)

array([[[ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.,  0.,  0.]]])

In [57]:
print('Array:\n',arr)
print('Transpose:\n',arr.T)

('Array:\n', array([[1000,    1,    2,    3],
       [   4,    5,    6,    7]]))
('Transpose:\n', array([[1000,    4],
       [   1,    5],
       [   2,    6],
       [   3,    7]]))


### More array properties

arr.T             arr.copy          arr.getfield      arr.put           arr.squeeze
arr.all           arr.ctypes        arr.imag          arr.ravel         arr.std
arr.any           arr.cumprod       arr.item          arr.real          arr.strides
arr.argmax        arr.cumsum        arr.itemset       arr.repeat        arr.sum
arr.argmin        arr.data          arr.itemsize      arr.reshape       arr.swapaxes
arr.argsort       arr.diagonal      arr.max           arr.resize        arr.take
arr.astype        arr.dot           arr.mean          arr.round         barr.tofile
arr.base          arr.dtype         arr.min           arr.searchsorted  arr.tolist
arr.byteswap      arr.dump          arr.nbytes        arr.setasflat     arr.tostring
arr.choose        arr.dumps         arr.ndim          arr.setfield      arr.trace
arr.clip          arr.fill          arr.newbyteorder  arr.setflags      arr.transpose
arr.compress      arr.flags         arr.nonzero       arr.shape         arr.var
arr.conj          arr.flat          arr.prod          arr.size          arr.view
arr.conjugate     arr.flatten       arr.ptp           arr.sort          

## 3. Operating the arrays

In [62]:
arr1 = np.arange(4)
arr2 = np.arange(10,14)
print(arr1,'+',arr2,'=',arr1+arr2)

(array([0, 1, 2, 3]), '+', array([10, 11, 12, 13]), '=', array([10, 12, 14, 16]))


In [63]:
print(arr1,'*',arr2,'=',arr1*arr2)

(array([0, 1, 2, 3]), '*', array([10, 11, 12, 13]), '=', array([ 0, 11, 24, 39]))


In [64]:
1.5*arr1

array([ 0. ,  1.5,  3. ,  4.5])

###  Broadcasting

In [65]:
print(np.arange(3))
print(np.arange(3)+5) #低维向高维补全

[0 1 2]
[5 6 7]


In [66]:
np.ones((3,3))

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [67]:
np.ones((3,3))+np.arange(3)

array([[ 1.,  2.,  3.],
       [ 1.,  2.,  3.],
       [ 1.,  2.,  3.]])

In [69]:
np.arange(3).reshape((3,1))+np.arange(3)

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [71]:
np.arange(3)

array([0, 1, 2])

###  Visualizing broadcasting

http://www.astroml.org/book_figures/appendix/fig_broadcast_visual.html

### Questions:

Will the following broadcasting operations work?

In [75]:
arr1 = np.ones((2,3))
arr2 = np.ones((2,1))
arr1+arr2

array([[ 2.,  2.,  2.],
       [ 2.,  2.,  2.]])

In [77]:
arr1 = np.ones((2,3))
arr2 = np.ones(2)
arr1+arr2

ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [78]:
np.ones(2)

array([ 1.,  1.])

In [80]:
arr1 = np.ones((2,3))
arr2 = np.ones(2).reshape(2,1)
arr1+arr2

array([[ 2.,  2.,  2.],
       [ 2.,  2.,  2.]])

In [89]:
np.ones(2).shape #(2,)表示1行2列
#np.ones((2,3)).shape

(2,)

In [82]:
np.ones(2)

array([ 1.,  1.])

In [87]:
np.ones(2).reshape(2,1)

array([[ 1.],
       [ 1.]])

In [88]:
np.ones(2).reshape(2,1).shape

(2, 1)

### Quick Exercise:

Use np.arange and reshape

A = [[1 2 3 4]
     [5 6 7 8]]

Use np.arange to create the array

B = [1 2]

Use broadcasting to add B to each column of A to create the final array

A + B = [[2  3  4  5]
         [7  8  9 10]

Hint: what shape does B have to be changed to?

###  Answers:

In [91]:
A = np.arange(1,9).reshape((2,4))
B = np.arange(1,3)
A + B.reshape((2,1))

array([[ 2,  3,  4,  5],
       [ 7,  8,  9, 10]])

### Element-wise Functions

In [107]:
x = np.linspace(0,2*np.pi,100)
y = np.sin(x)
x

array([ 0.        ,  0.06346652,  0.12693304,  0.19039955,  0.25386607,
        0.31733259,  0.38079911,  0.44426563,  0.50773215,  0.57119866,
        0.63466518,  0.6981317 ,  0.76159822,  0.82506474,  0.88853126,
        0.95199777,  1.01546429,  1.07893081,  1.14239733,  1.20586385,
        1.26933037,  1.33279688,  1.3962634 ,  1.45972992,  1.52319644,
        1.58666296,  1.65012947,  1.71359599,  1.77706251,  1.84052903,
        1.90399555,  1.96746207,  2.03092858,  2.0943951 ,  2.15786162,
        2.22132814,  2.28479466,  2.34826118,  2.41172769,  2.47519421,
        2.53866073,  2.60212725,  2.66559377,  2.72906028,  2.7925268 ,
        2.85599332,  2.91945984,  2.98292636,  3.04639288,  3.10985939,
        3.17332591,  3.23679243,  3.30025895,  3.36372547,  3.42719199,
        3.4906585 ,  3.55412502,  3.61759154,  3.68105806,  3.74452458,
        3.8079911 ,  3.87145761,  3.93492413,  3.99839065,  4.06185717,
        4.12532369,  4.1887902 ,  4.25225672,  4.31572324,  4.37

In [93]:
np.array?

In [100]:
list?

In [97]:
copy?

Object `copy` not found.


In [106]:
np.array?

In [102]:
np.mask?

Object `np.mask` not found.


In [104]:
musk?

Object `musk` not found.


##  4. Linear algebra in numpy

In [108]:
v1 = np.array([2,3,4])
v2 = np.array([1,0,1])
print(v1,'.',v2,'=',np.dot(v1,v2))

(array([2, 3, 4]), '.', array([1, 0, 1]), '=', 6)


In [111]:
np.dot(v1.T,v2) #numpy中行向量列向量并无区别

6

In [112]:
A = np.arange(6).reshape(2,3)
print(A,'x',v1,'=',np.dot(A,v1))

(array([[0, 1, 2],
       [3, 4, 5]]), 'x', array([2, 3, 4]), '=', array([11, 38]))


In [113]:
v1.shape

(3,)

In [114]:
np.dot(A,v1.T)

array([11, 38])

In [115]:
A.shape

(2, 3)

In [117]:
np.dot(v1,A)

ValueError: shapes (3,) and (2,3) not aligned: 3 (dim 0) != 2 (dim 0)

In [118]:
print(np.dot(A,A.T)) #向量无区别，矩阵还是有区别的。

[[ 5 14]
 [14 50]]


##  Reading and writing arrays to disk

###  Text data

In [121]:
arr = np.arange(10).reshape(2,5)
np.savetxt('test.out',arr,fmt='%.2e',header="My dateset")
!cat test.out

# My dateset
0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00


In [122]:
arr2 = np.loadtxt('test.out')
print(arr2)

[[ 0.  1.  2.  3.  4.]
 [ 5.  6.  7.  8.  9.]]


###  Binary Data

In [125]:
np.save('test.npy',arr2)
arr2n = np.load('test.npy')
print('Any differences?',np.any(arr2-arr2n))
print(arr2n)
!cat test.npy

('Any differences?', False)
[[ 0.  1.  2.  3.  4.]
 [ 5.  6.  7.  8.  9.]]
�NUMPY F {'descr': '<f8', 'fortran_order': False, 'shape': (2, 5), }          
              �?       @      @      @      @      @      @       @      "@

###  .npz: multiple binary outputs in one files

In [130]:
np.savez('test.npz',array1=arr,array2=arr2)
arrays = np.load('test.npz')
arrays.files

['array2', 'array1']

In [134]:
print('First row of first array:',arrays['array1'][0][1:3])
print('First row of first array:',arrays.f.array1[0])

('First row of first array:', array([1, 2]))
('First row of first array:', array([0, 1, 2, 3, 4]))


In [132]:
print(arr)

[[0 1 2 3 4]
 [5 6 7 8 9]]


In [133]:
print(arr2)

[[ 0.  1.  2.  3.  4.]
 [ 5.  6.  7.  8.  9.]]


##  练习

1. 思考np.array和list有什么不同  
2. 如何使用mask方法快速截取数据
3. 思考view和copy的区别
4. np.array还有哪些属性和方法
5. 理解什么是boradcasting，如何使用
6. 如何计算向量、矩阵相乘
7. 如何从文件中读取数据

1. array must be the same type, and list not always. array is easier to computing.
2. mask = condition
3. view read sth while copy operate( make things to the clipboard.
4. with mat, dtype.
5. lower dimensions up to higher to make the calculate available.
6. use np.dot, for vectors there aren't any differences between row vector and column vector. matrix must follow the rules.
7. np.loadtxt or np.load