# The `Numpy` class

Example. Get the distances between points for all pairs. 

- `ndarray` as vector calculation
- linear algebra, random variable generation

In [4]:
!pip install numpy
import numpy as np

Defaulting to user installation because normal site-packages is not writeable


## Creating an array


`numpy.array(object, dtype, ndmin=0)`
- object: an array-like object
- dtype: the type we get 
- ndmin: the minimal dimension of the data

In [5]:
arr1 = np.array([0.3, 0.5, 4.2]) # this is no longer list
print(arr1, type(arr1))

[0.3 0.5 4.2] <class 'numpy.ndarray'>


In [6]:
# Here is a 2D array


arr2 = np.array([[3,1,1,],[1,1,1]])

Property of an array
- ndim : The dimension of the number
- shape : a tuple that describe `(n,m)` given a $n \times m$ matrix
- size : return $n\times m$
- dtype : return data type 
- itemsize: get the item size, in byte(int)

## Creating special array

- `arr.shape = 4,3` means reconfig its shape to (4,3)
- `arrange(start, stop, step)` means that it generates a interval. Like `range`.
- `linspace(start, stop, sample_number)` as arithematic
- `logspace` as geometric progression
- `zeros` for all 0 array 
- `eye` for $I_n$, identity matrix


In [10]:
np.arange(0,1,0.1)
np.zeros([3,4,5]) # 3*4*5

array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]])

## Type system of array

In [11]:
arr = np.array([1,1,4])
arr[0] = 1.14

print(arr) # Not changing, why? 

[1 1 4]


In [13]:
arr = np.array([1,1,4], dtype = float)
arr[0] = 1.14

print(arr) # Not changing, why? 

[1.14 1.   4.  ]


## Generate Random Number

We have `seed`, `permutation`, `suffle`, `binomial`, `normal`, `beta`, `chisquare`(meaning $\chi^2$), and `gamma`($\gamma$), and `uniform`. 

In [14]:
np.random.random(10) # 10 rands with no constraints
np.random.rand(3, 4) # Uniformly generated

np.random.randn(10,2,4) # Generate a data in normal distribution

array([[0.06991414, 0.10876226, 0.43486834, 0.60629697],
       [0.76864276, 0.70051242, 0.36683188, 0.30411878],
       [0.05675549, 0.6227112 , 0.76454249, 0.71963441]])

## Index system

In [18]:
arr1 = np.array([0.3, 0.78, 0.21, 5, 3.2])

# Index single element
print(arr1[0], arr1[-1])


# Index multiple (slices) preserves the structure
print(arr1[1:2])


print(arr1[3].shape)
print(arr1[-4:-2].shape)

0.3 3.2
[0.78]
()
(2,)


In [22]:
# Logical index 
arr2 = np.array([2.3,1.1, 4.5])
print(arr2)
print(arr2[[False, False, True]])


arr2>2 # for i in arr2 if it >2 True else false 


[2.3 1.1 4.5]
[4.5]


array([ True, False,  True])

In [30]:
# Index for the multiple dimension 

arr3 = np.arange(1,13).reshape([3,4])
print(arr3)

# Get 12 by indicies
print(arr3[2, 3])

# Get the 3rd row 
print(arr3[2, 0:]) # 0: means from 0 to last
# Get the first col
print(arr3[0:, 0])

# A square
print(arr3[1:3, 1:3])

print(arr3[2:,:]) # two shapes

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
12
[ 9 10 11 12]
[1 5 9]
[[ 6  7]
 [10 11]]
[[ 9 10 11 12]]


In [31]:
# Logic index 

# Get all elems in the first row greater than 4

print(arr3[arr3[:, 0]>4])

[[ 5  6  7  8]
 [ 9 10 11 12]]


We shall solve the problem.

In [34]:
n = 10
x = np.linspace(1,100, n)
y = np.linspace(1,100, n)

dist = np.zeros([n,n])

for i in range(n):
    for j in range(n):
        dist[i, j] = np.sqrt((x[i]-x[j])**2 + (y[i]-y[j])**2)

print(dist)

[[  0.          15.55634919  31.11269837  46.66904756  62.22539674
   77.78174593  93.33809512 108.8944443  124.45079349 140.00714267]
 [ 15.55634919   0.          15.55634919  31.11269837  46.66904756
   62.22539674  77.78174593  93.33809512 108.8944443  124.45079349]
 [ 31.11269837  15.55634919   0.          15.55634919  31.11269837
   46.66904756  62.22539674  77.78174593  93.33809512 108.8944443 ]
 [ 46.66904756  31.11269837  15.55634919   0.          15.55634919
   31.11269837  46.66904756  62.22539674  77.78174593  93.33809512]
 [ 62.22539674  46.66904756  31.11269837  15.55634919   0.
   15.55634919  31.11269837  46.66904756  62.22539674  77.78174593]
 [ 77.78174593  62.22539674  46.66904756  31.11269837  15.55634919
    0.          15.55634919  31.11269837  46.66904756  62.22539674]
 [ 93.33809512  77.78174593  62.22539674  46.66904756  31.11269837
   15.55634919   0.          15.55634919  31.11269837  46.66904756]
 [108.8944443   93.33809512  77.78174593  62.22539674  46.66904

## Changing shape



In [43]:
arr4 = np.arange(1, 13)
print(arr4)


arr5 = arr4.reshape([3,4])


print(arr5.ravel())
print(arr5.flatten('F'))

arr6 = np.stack([arr5,arr5])
print(arr6)


[ 1  2  3  4  5  6  7  8  9 10 11 12]
[ 1  2  3  4  5  6  7  8  9 10 11 12]
[ 1  5  9  2  6 10  3  7 11  4  8 12]
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]]


# Matrix in `numpy`


In [48]:
mat1 = np.mat("1 2  3; 4 5 6; 7 8 9")
mat2 = np.matrix([[1,2,3],[4,5,6],[7,8,9]])
print(mat1)



# make up a bigger one
print(np.bmat("mat1 mat2;mat2, mat1"))

# transpose, inverse
print(mat1.T, mat1.I)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]
 [7 8 9 7 8 9]
 [1 2 3 1 2 3]
 [4 5 6 4 5 6]
 [7 8 9 7 8 9]]


## Universal function

This is also called vectorlized operation. It can be exec by every elem in the list.

In [57]:
print(arr1, arr1+1, arr1*2, arr1**2)

arr2=[4,1,1,5,1]
print(arr1+arr2, arr1*arr2, arr1>arr2)

print(np.any(arr1 == 0.3))
print(np.all(arr1 == 0.2))

[0.3  0.78 0.21 5.   3.2 ] [1.3  1.78 1.21 6.   4.2 ] [ 0.6   1.56  0.42 10.    6.4 ] [ 0.09    0.6084  0.0441 25.     10.24  ]
[ 4.3   1.78  1.21 10.    4.2 ] [ 1.2   0.78  0.21 25.    3.2 ] [False False False False  True]
True
False


## Boardcast machanism

`numpy` will try to copy to the same number of value to the adder with more elems. 

In [None]:
arr3 = np.arange(1,13)
