In [None]:
""" CONTENT
* The Basics
    # An example
    # Array creation
    # printing arrays
    # basic operations
    # universal functions
    # indexing slicing and iterating
* Shape manipulation
    # changing the shape of an array
    # stacking together different arrays
    # splitting one array into several smaller ones
* Copies and Views
    # No copy at all
    # view or shallow copy
    # deep copy
    # functions and methods overview
* Less basic
    # Broadcasting rules
* Fancy indexing and index tricks
    # indexing with arrays of indices
    # indexing with boolean arrays
    # the ix_() function
    # indexing with strings
* Linear Algebra
    # sipmle array operations
* Tricks and tips
    # 'Automatic' reshaping
    # Vector stacking
    # Histograms
* Further reading
"""

In [None]:
"""
* The Basics
#   In NumPy dimensions are called axes, e.g.[[ 1., 0., 0.],[ 0., 1., 2.]]
#   The first axis has a length of 2, the second axis has a length of 3;
#   whereas axis = 0 means column, axis = 1 means row
""" 
# ndarray.ndim : the number of axes
# ndarray.shape : (n,m) n rows,m columns
# ndarray.size : n*m the total number of elements 
# ndarray.dtype : data type. e.g.numpy.int32 / numpy.int16 / numpy.float64 / numpy.complex32
# ndarray.itemsize : the size in bytes of each element of arrays; 8 = 64/8 for numpy.float64
# ndarray.data : 

In [1]:
# An example
import numpy as np
arr1 = np.arange(15).reshape(3,5)
arr2 = np.array([6,7,8])
arr1.shape
arr1.itemsize
type(arr2)

numpy.ndarray

In [7]:
# Array creation
# 1. using list or tuple to create an araay
a = np.array([1,2,3]) # np.array(1,2,3) is wrong
b = np.array((1,2))
c = np.array([[1,2],[2,3],[3,4]]) # array transforms sequences of sequences into two-dimensional arrays, and so on
d = np.array([6,7,8],dtype = np.int32) # specifing the data type
# placehold : known size of array
np.zeros((2,3))  # recieve a tuple as parameter
np.ones((3,4),dtype = np.int16)
np.empty((2,3)) # the value of elements is random
# 
np.arange(10,30,5)     # return arrays rather than lists
np.arange(0,2,0.3)     # it eccepts float arguments
np.linspace(0,2,9)     # total 9 numbers from 0 to 2
np.random.rand(2,3)    # 2 rows and 3 columns,random number from 0 to 1
np.random.randn(2,3)   # 2 rows and 3 columns,standard_normal,sigma * np.random.randn(...) + mu
np.random.randint(3,8) # one int from 3 to 8

6

In [8]:
# printing arrays
"""
the last axis is printed from left to right,
the second-to-last is printed from top to bottom,
the rest are also printed from top to bottom, with each slice separated from the next by an empty line.
"""
c = np.arange(24).reshape(2,3,4)
print(c)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


In [10]:
# basic operations
## Arithmetic operators on arrays apply elementwise
a = np.array( [20,30,40,50] )
b = np.arange( 4 )
c = a-b        # [20, 29, 38, 47]
b**2           # [0,1,4,9]
10*np.sin(a)   #
a<35           # [ True, True, False, False]

A = np.array( [[1,1],[0,1]] )
B = np.array( [[2,0],[3,4]] )
A*B            # elementwise product
A@B            # matrix product
A.dot(B)       # matrix product
# += and *=, to modify an existing array rather than create a new one
a = np.ones((2,3), dtype=int)
b = np.random.random((2,3))
a *= 3
b += a
# a += b    TypeError: b is not automatically converted to integer type

# When operating with arrays of different types, the type of the resulting 
# array corresponds to the more general or precise one
c = a+b
print(a.dtype.name,b.dtype.name,c.dtype.name)

# Methods of ndarray class,e.g. ndarray.sum(),ndarray.min(),ndarray.max()
a = np.random.random((2,3))
a.sum()
a.min()
a.max()
b = np.arange(12).reshape(3,4)
b.sum(axis=0)        # sum of each column
b.min(axis=1)        # min of each row
b.cumsum(axis=1)     # cumulative sum along each row

int32 float64 float64


array([[ 0,  1,  3,  6],
       [ 4,  9, 15, 22],
       [ 8, 17, 27, 38]], dtype=int32)

In [11]:
# universal functions :  ufunc, 调用时用 numpy.ufunc()的形式，其不是ndarray class 的methods
# these functions operate elementwise on an array, producing an array as output
# e.g. np.sin(),np.cos(),np.exp(),np.sqrt(),np.add(a,b),np.floor() and ...
# all, any, apply_along_axis, argmax, argmin, argsort, average, bincount, ceil, clip, 
# conj, corrcoef, cov, cross, cumprod, cumsum, diff, dot, floor, inner, inv, lexsort, 
# max, maximum, mean, median, min, minimum, nonzero, outer, prod, re, round, sort, std, 
# sum, trace, transpose, var, vdot, vectorize, where
arr1 = np.arange(3)
np.exp(arr1)

array([1.        , 2.71828183, 7.3890561 ])

In [19]:
# indexing slicing and iterating
## one-dimentional
a = np.arange(10)**2
a[2]
a[2:5]
a[:6:2] = 100 # revise multi elements
a[::-1]
#for i in a:
#    print(i**0.5)
    
# multi-dimentional
b = np.arange(15).reshape(3,5)
b[2,3]
b[0:2,1]
b[:,1]
b[1:3,:]
b[-1]    # equivalent to b[-1,:]
c = np.arange(12).reshape(2,2,3)
c[1,...]    # equivalent to c[1,:,:]
c[...,2]    # equivalent to c[:,:,2]

# Iterating over multidimensional arrays is done with respect to the first axis
for arr1 in c:
    print(arr1)
# using flat attribute to perform an operation on each element
for arr1 in c.flat:
    print(arr1)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]
0
1
2
3
4
5
6
7
8
9
10
11


In [25]:
"""
* Shape manipulation
ravel / reshape / T / resize /
"""
# 1.ravel / reshape / T  return a modified array, but do not change the original array
a = np.floor(10*np.random.random((3,4))) # np.floor() 向下取整,np.round()四舍五入，np.ceil()向上取整
a.ravel()       # return an array flattened
                #“C-style”, that is, the rightmost index “changes the fastest”,按行存储
a.reshape(6,2)  
a.reshape(6,-1) # same as a.reshape(6,2),compute automatically
a.T             # transpose
# 2. ndarray.resize method modifies the array itself
a.resize((2,6))  # return None, and modified the array itself
a

array([[5., 7., 3., 6., 2., 9.],
       [8., 0., 3., 2., 4., 2.]])

In [29]:
# stacking together different arrays along different axes
# np.vstack / np.hstack np.concatenate / np.r_ / np.c_ recieve a tuple as argument 
a = np.array([[1,2],[3,4]])
b = np.array([[5,10],[6,8]])
np.vstack((a,b))             # tuple (a,b)
np.hstack((a,b))
np.concatenate((a,b),axis=0) # same as np.vstack((a,b))
np.concatenate((a,b),axis=1) # same as np.hstack((a,b))
np.r_[np.array([1,2,3]), 0, 0, np.array([4,5,6])]
np.r_[1:4,0,4]               # [1,2,3,0,4]
np.c_[np.array([1,2,3]), np.array([4,5,6])]

array([[1, 4],
       [2, 5],
       [3, 6]])

In [33]:
# splitting one array into several smaller ones
# np.hsplit() / np.vsplit() / np.array_split()
a = np.arange(24).reshape(2,12)
np.hsplit(a,3)        # Split 'a' into 3 arrays
np.hsplit(a,(3,4))    # Split 'a' after the third and the fourth column
np.array_split(a,2,axis=1) # axis = 1 corresponds to np.hsplit

[array([[ 0,  1,  2,  3,  4,  5],
        [12, 13, 14, 15, 16, 17]]), array([[ 6,  7,  8,  9, 10, 11],
        [18, 19, 20, 21, 22, 23]])]

In [34]:
"""
* Copies and Views
"""
# No copy at all
a = np.arange(12)
b = a            # no new object is created
b is a           # True   a and b are two names for the same ndarray object
b.shape = 3,4    # changes the shape of a

True

In [44]:
# view or shallow copy
# Different array objects can share the same data. 不同对象共享同一块数据
# The view method creates a new array object that looks at the same data
c = a.view()
c is a             # False
c.base is a        # True  # c is a view of the data owned by a
c.flags.owndata    # False
c.shape = 2,6      # a's shape doesn't change
c[0,4] = 1234      # a's data changes

# Slicing an array returns a view of it
a = np.arange(12).reshape(3,4)
s = a[ : , 1:3]
s[:] = 10           # s[:] is a view of s.
a                   # a's data changes

array([[ 0, 10, 10,  3],
       [ 4, 10, 10,  7],
       [ 8, 10, 10, 11]])

In [45]:
# deep copy
d = a.copy()      # d doesn't share anything with a

In [46]:
# functions and methods overview
"""
Array Creation
    arange, array, copy, empty, empty_like, eye, fromfile, fromfunction, identity, linspace, logspace, mgrid, ogrid, ones, ones_like, r, zeros, zeros_like
Conversions
    ndarray.astype, atleast_1d, atleast_2d, atleast_3d, mat
Manipulations
    array_split, column_stack, concatenate, diagonal, dsplit, dstack, hsplit, hstack, ndarray.item, newaxis, ravel, repeat, reshape, resize, squeeze, swapaxes, take, transpose, vsplit, vstack
Questions
    all, any, nonzero, where
Ordering
    argmax, argmin, argsort, max, min, ptp, searchsorted, sort
Operations
    choose, compress, cumprod, cumsum, inner, ndarray.fill, imag, prod, put, putmask, real, sum
Basic Statistics
    cov, mean, std, var
Basic Linear Algebra
    cross, dot, outer, linalg.svd, vdot
"""

'\nArray Creation\n    arange, array, copy, empty, empty_like, eye, fromfile, fromfunction, identity, linspace, logspace, mgrid, ogrid, ones, ones_like, r, zeros, zeros_like\nConversions\n    ndarray.astype, atleast_1d, atleast_2d, atleast_3d, mat\nManipulations\n    array_split, column_stack, concatenate, diagonal, dsplit, dstack, hsplit, hstack, ndarray.item, newaxis, ravel, repeat, reshape, resize, squeeze, swapaxes, take, transpose, vsplit, vstack\nQuestions\n    all, any, nonzero, where\nOrdering\n    argmax, argmin, argsort, max, min, ptp, searchsorted, sort\nOperations\n    choose, compress, cumprod, cumsum, inner, ndarray.fill, imag, prod, put, putmask, real, sum\nBasic Statistics\n    cov, mean, std, var\nBasic Linear Algebra\n    cross, dot, outer, linalg.svd, vdot\n'

In [53]:
"""
* Fancy indexing and index tricks
"""
# Indexing with Arrays of Indices 用下标数组进行引用,return an array
a = np.arange(12)**2
i = np.array( [ 1,1,3,8,5 ] ) 
a[i]
j = np.array( [ [ 3, 4], [ 9, 7 ] ] )
a[j]

# When the indexed array a is multidimensional, a single array of indices refers to the first dimension of a. 
palette = np.array( [ [0,0,0],                # black
                      [255,0,0],              # red
                      [0,255,0],              # green
                      [0,0,255],              # blue
                      [255,255,255] ] )       # white
image = np.array( [ [ 0, 1, 2, 0 ],           # each value corresponds to a color in the palette
                    [ 0, 3, 4, 0 ]  ] )       # palette是2维，每个下标 参考的 是第一维度的值，如[0,0,0]
palette[image]                                # the (2,4,3) color image

# giving indexes for more than one dimension
a = np.arange(12).reshape(3,4)
i = np.array( [ [0,1],         # indices for the first dim of a
                [1,2]])
j = np.array( [ [2,1],         # indices for the second dim of a
                [3,3]])
a[i,j]                         # i and j must have equal shape
                               # [[(0,2),(1,1)],
                               #  [(1,3),(2,3)]]
a[i,2]
l = [i,j]
a[l]

#
time = np.linspace(20, 145, 5)
data = np.sin(np.arange(20)).reshape(5,4) 
ind = data.argmax(axis=0)          # np.argmax()   index of the maxima for each series
time_max = time[ind]               # times corresponding to the maxima
data_max = data[ind, range(data.shape[1])]

In [56]:
# Indexing with Boolean Arrays
a = np.arange(12).reshape(3,4)
b = a > 4                    # b is a boolean with a's shape
a[b]                         # 1d array with the selected elements
                             # [ 5,  6,  7,  8,  9, 10, 11]只留下True的值，shape改变了
a[b] = 0                     # All elements of 'a' higher than 4 become 0

#
a = np.arange(12).reshape(3,4)
b1 = np.array([False,True,True])             # first dim selection
b2 = np.array([True,False,True,False])       # second dim selection
a[b1,:]                                       # selecting rows,same as a[b1]
a[:,b2]                                       # selecting columns
a[b1,b2]                                     # a weird thing to do

array([[0, 1, 2, 3],
       [4, 0, 0, 0],
       [0, 0, 0, 0]])

In [None]:
# the ix_() function


In [57]:
# Simple Array Operations
a = np.array([[1.0, 2.0], [3.0, 4.0]])
a.transpose()
np.linalg.inv(a)
u = np.eye(2)
j = np.array([[0.0, -1.0], [1.0, 0.0]])
j @ j
np.trace(u)
y = np.array([[5.], [7.]])
np.linalg.solve(a, y)
np.linalg.eig(j)

(array([0.+1.j, 0.-1.j]),
 array([[0.70710678+0.j        , 0.70710678-0.j        ],
        [0.        -0.70710678j, 0.        +0.70710678j]]))

In [58]:
# Tricks and Tips
# “Automatic” Reshaping ： -1
a = np.arange(30)
a.shape = 2,-1,3  # -1 means "whatever is needed"
a



array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14]],

       [[15, 16, 17],
        [18, 19, 20],
        [21, 22, 23],
        [24, 25, 26],
        [27, 28, 29]]])