# numpy basics: part 1
(1) Initializing an array and get properties from it using arange(), reshape(), shape, ndim, size...
(1) Concatenate and spliting an array using concatenate(), split()

In [126]:
import numpy as np

In [3]:
np.zeros((2,2))

array([[0., 0.],
       [0., 0.]])

In [4]:
np.ones(shape=(2,2))

array([[1., 1.],
       [1., 1.]])

In [5]:
np.full(shape=5, fill_value=7)

array([7, 7, 7, 7, 7])

## arange() in numpy is similar to range() in python

In [6]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
np.arange(1,10,2)

array([1, 3, 5, 7, 9])

In [8]:
np.arange(0,10,2.5)

array([0. , 2.5, 5. , 7.5])

In [162]:
# Using nupmy to create an array is also faster than using original Python
tmp = [i for i in range(10000000)]
%timeit l = [2*e for e in tmp]

503 ms ± 24 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [163]:
tmp = np.arange(10000000)
%timeit l = np.array(2*e for e in tmp)

1.55 µs ± 17.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## linspace() is also similar to range (major difference: the 3rd parameter denotes how many numbers the range is going to be divided into)
e.g., np.linspace(0,10,11) denotes between 0 and 10 inclusively, the array is divided into 11 numbers 

In [9]:
np.linspace(0,10,2)

array([ 0., 10.])

In [10]:
np.linspace(0,10,11)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

## random()

In [11]:
np.random.randint(10)

6

In [12]:
np.random.randint(0,10,size=10)

array([5, 1, 0, 9, 7, 6, 8, 7, 0, 2])

In [13]:
np.random.randint(0,10,size=(2,2))

array([[2, 4],
       [1, 4]])

In [14]:
np.random.seed(666)

In [15]:
np.random.randint(4,8,size=(2,2))

array([[4, 6],
       [5, 6]])

In [16]:
np.random.randint(0,10,size=(2,2))

array([[6, 9],
       [4, 3]])

In [17]:
# generate float numbers
np.random.random(5)

array([0.25987554, 0.28116849, 0.46284169, 0.23340091, 0.76706421])

In [18]:
# normal distribution
np.random.normal(0,5,size=5)

array([-1.9240094 ,  6.00445654, -4.39132244, -2.33989719,  4.53402869])

In [19]:
np.random.normal?

## ndim (dimension), shape, size (number of elements)

In [21]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
X = np.arange(10).reshape(2,5)
X

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [25]:
print(x.ndim, X.ndim)

1 2


In [26]:
print(x.shape, X.shape)

(10,) (2, 5)


In [27]:
print(X[1,1], X[1][1])

6 6


In [31]:
# Also similar to list in Python
x[:10], x[0:], x[0:10]

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([1, 2, 3, 4, 5, 6, 7, 8, 9]))

In [34]:
x[::2]

array([0, 2, 4, 6, 8])

In [35]:
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [43]:
matrix=np.arange(1, 26).reshape(5,5)
matrix

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [47]:
# Get an element from a matrix using X[1,1] is more intuitive than using X[1][1]
# matrix[:3][:4] fetch 3 elements (3 rows) firstly, than try to fetch 4 elements
# from 3 rows. Yet, since there are only 3 elements, so they are all fetched 
matrix[:3, :4], matrix[:3][:4]

(array([[ 1,  2,  3,  4],
        [ 6,  7,  8,  9],
        [11, 12, 13, 14]]),
 array([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10],
        [11, 12, 13, 14, 15]]))

In [50]:
# Using slicing to access
matrix[0], matrix[:, 0]

(array([1, 2, 3, 4, 5]), array([ 1,  6, 11, 16, 21]))

In [51]:
matrix[::-1, ::-1]

array([[25, 24, 23, 22, 21],
       [20, 19, 18, 17, 16],
       [15, 14, 13, 12, 11],
       [10,  9,  8,  7,  6],
       [ 5,  4,  3,  2,  1]])

Simply use slicing for array in numpy will not copy the array, which is different from Python's list

In [53]:
subMat = matrix[:2, :2]
subMat

array([[1, 2],
       [6, 7]])

In [54]:
matrix[0,0] = 999
subMat

array([[999,   2],
       [  6,   7]])

In [57]:
# Explicitly use copy() to make a copy. Otherwise, the reference of the original array is used
subMat[0,0] = 1
subMat = matrix[:2, :2].copy()
subMat[0,0] = 999
matrix, subMat

(array([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10],
        [11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20],
        [21, 22, 23, 24, 25]]),
 array([[999,   2],
        [  6,   7]]))

# reshape() 
Used along with arange() method to create a new array/matrix or reshape an existing one

In [62]:
# reshape()
x2 = x.reshape(2,5)
x, x2

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]]))

In [63]:
x2 = x.reshape(1,10)
x.ndim, x2.ndim

(1, 2)

In [67]:
# use -1 to automatically calculate the number of rows
x.reshape(-1,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [66]:
x.reshape(10,-1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

# Combine vectors or matrices using concatenate(), vstack(), or hstack()
v: vertical, h: horizontal

In [79]:
x = np.array([1,2,3])
y = np.array([4,5,6,7,8])
z = np.array([7,8,9])

In [80]:
np.concatenate([x,y])
np.concatenate([x,y], axis=0)

array([1, 2, 3, 4, 5, 6, 7, 8])

In [74]:
np.concatenate([x,y,z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [75]:
np.concatenate([matrix,matrix])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25],
       [ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [78]:
np.concatenate([matrix,matrix],axis=1)

array([[ 1,  2,  3,  4,  5,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 21, 22, 23, 24, 25]])

In [82]:
# Note that parameters in concatenate must have same dimension
np.concatenate([matrix, y])

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [83]:
np.concatenate([matrix, y.reshape(1,-1)])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25],
       [ 4,  5,  6,  7,  8]])

In [91]:
y.reshape(1,-1).ndim

2

In [96]:
np.vstack([matrix, y])

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25],
       [ 4,  5,  6,  7,  8]])

In [99]:
y1 = np.ones(shape=(5,1))
np.hstack([matrix, y1])

array([[ 1.,  2.,  3.,  4.,  5.,  1.],
       [ 6.,  7.,  8.,  9., 10.,  1.],
       [11., 12., 13., 14., 15.,  1.],
       [16., 17., 18., 19., 20.,  1.],
       [21., 22., 23., 24., 25.,  1.]])

# Split an array using split(), vsplit(), hsplit()

In [101]:
np.split?

In [103]:
a1,a2 = np.split(x,[2])
a1,a2

(array([1, 2]), array([3]))

In [106]:
y1,y2,y3 = np.split(y,[2,4])
y1,y2,y3

(array([4, 5]), array([6, 7]), array([8]))

In [108]:
m1,m2 = np.split(matrix, [2])
m1,m2

(array([[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]]),
 array([[11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20],
        [21, 22, 23, 24, 25]]))

In [110]:
m1,m2 = np.split(matrix, [2], axis = 1)
m1,m2

(array([[ 1,  2],
        [ 6,  7],
        [11, 12],
        [16, 17],
        [21, 22]]),
 array([[ 3,  4,  5],
        [ 8,  9, 10],
        [13, 14, 15],
        [18, 19, 20],
        [23, 24, 25]]))

In [116]:
m1,m2 = np.hsplit(matrix,[3])
m1,m2

(array([[ 1,  2,  3],
        [ 6,  7,  8],
        [11, 12, 13],
        [16, 17, 18],
        [21, 22, 23]]),
 array([[ 4,  5],
        [ 9, 10],
        [14, 15],
        [19, 20],
        [24, 25]]))

In [119]:
X,y = np.hsplit(matrix,[-1])
X,y

(array([[ 1,  2,  3,  4],
        [ 6,  7,  8,  9],
        [11, 12, 13, 14],
        [16, 17, 18, 19],
        [21, 22, 23, 24]]),
 array([[ 5],
        [10],
        [15],
        [20],
        [25]]))

In [121]:
y.reshape(1,-1)

array([[ 5, 10, 15, 20, 25]])

In [123]:
y[:,0]

array([ 5, 10, 15, 20, 25])