# Numpy Primer

## 1. Introduction

- Numpy is a linear algebra Library for Python
- Almost all of the libraries in the PyData ecosystem reply on Numpy as one of their main building blocks
- Fase and efficient

## 2. Data Structure

### 2.1 Numpy Arrays
- Numpy arrays are the main data structure of numpy
- Numpy arrays come in two flavors: vectors and matrices
- Vectors are strictly 1-d arrays
- Matrices are 2-d arrays

In [3]:
## Python List
my_list = [1, 2, 3]
my_list

[1, 2, 3]

In [6]:
## Load numpy library
import numpy as np
## Cast a normal Python list to a numpy array: vector
my_array = np.array(my_list)
## my_array is a vector now
my_array

array([1, 2, 3])

In [8]:
## Cast a list of list to a numpy arry: matrix
my_mat_list = [[1,2,3],[4,5,6],[7,8,9]]
my_matrix = np.array(my_mat_list)
my_matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [23]:
## Use Numpy built in functions to generate arrays
np.arange(1,10,1).reshape((3,3))
## arange(start, end, step), similar to python own range function

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [15]:
## Arrays of all zeros
np.zeros(3) # a vector

array([ 0.,  0.,  0.])

In [18]:
np.zeros((3,3)) # a matrix, add a tuple

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [20]:
## Arrays of all ones
print(np.ones(3))
print(np.ones((4,5)))

[ 1.  1.  1.]
[[ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]


In [25]:
## Linspace
np.linspace(1,7,16).reshape((4,4))
## np.linspace(start, end, total numbers to be evenly distributed)

array([[ 1. ,  1.4,  1.8,  2.2],
       [ 2.6,  3. ,  3.4,  3.8],
       [ 4.2,  4.6,  5. ,  5.4],
       [ 5.8,  6.2,  6.6,  7. ]])

In [37]:
## Compare arange with linspace
x = np.arange(1,20,1.0)
y = np.linspace(1,20,20)
for i,j in zip(x,y):
    print(i == j)

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [39]:
## Create an identity matrix, must be a square
np.eye(4)

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

In [43]:
## Create random numbers
np.random.rand(3,4) # generate a random matrix

array([[ 0.85995264,  0.9644989 ,  0.31791386,  0.74911753],
       [ 0.11023859,  0.10578448,  0.274304  ,  0.83699805],
       [ 0.52763734,  0.78594664,  0.65480478,  0.13675752]])

In [47]:
## Create standard normal distribution
np.random.randn(3,3)

array([[-0.65832639,  1.07838899, -0.54504856],
       [-0.31292979,  0.272115  ,  1.28246085],
       [-0.06930708, -0.75758972,  0.2504943 ]])

In [51]:
## Create a random integer
np.random.randint(1,101)
np.random.randint(1,101,10)
np.random.randint(1,101,10).reshape((2,5))
## np.random.randint(low bound (inclusive), up bound (non inclusive), number of items)

array([[24, 99, 23, 11, 55],
       [27, 14, 21, 33, 40]])

### 2.2 Numpy Array Features and Attributes

In [88]:
## Features of arrays
my_arr = np.arange(25)
print(my_arr)
ran_arr = np.random.randint(0,50,10)
print(ran_arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]
[29 18  6 46  8 28  0 30 22 48]


#### 2.2.1 Reshape

In [97]:
my_arr = my_arr.reshape(5,5)
print(my_arr)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


#### 2.2.2 Find max, min, mean, index location

In [90]:
print(ran_arr.mean())
print(ran_arr.min())
print(ran_arr.max())

23.5
0
48


In [91]:
## Find the index location
print(ran_arr.argmin())
print(ran_arr.argmax())

6
9


In [100]:
## Find the index location of a particular value in the array
print(np.argwhere(ran_arr==28))
print(np.argwhere(my_arr == 18))

[[5]]
[[3 3]]


In [103]:
## Find the shape of an array
print(my_arr.shape)
## Find the data type of an array
print(my_arr.dtype)

(5, 5)
int32


In [110]:
## simplify package calling
from numpy.random import randn
my_rand_1 = randn(25).reshape(5,5)
print(my_rand_1)
print(my_rand_1.mean())
print(my_rand_1.std())

[[-2.84434776 -1.42162913 -0.13050442  0.92705157 -1.04349084]
 [-0.9754426  -1.97009916 -0.44349448 -2.57933421  0.97046743]
 [-0.51058344 -0.74087011  0.1685928  -1.39792689 -0.48305153]
 [ 1.21633641 -3.07954053  0.24123411  0.82310616 -0.649349  ]
 [-0.68134595  2.68412707  0.38178042 -0.14632804  2.65620017]]
-0.361137678089
1.43495895802


### 2.3 Using Arrays and Scalars

In [125]:
arr1 = np.array([[1,2,3,4],[5,6,7,8]])
arr1 * arr1
## element wide multiplication

array([[ 1,  4,  9, 16],
       [25, 36, 49, 64]])

In [126]:
1 / arr1

array([[ 1.        ,  0.5       ,  0.33333333,  0.25      ],
       [ 0.2       ,  0.16666667,  0.14285714,  0.125     ]])

In [127]:
arr1 ** 3

array([[  1,   8,  27,  64],
       [125, 216, 343, 512]], dtype=int32)

## 2.4 Numpy Indexing and Selection

In [10]:
import numpy as np
arr_1 = np.arange(0,11)
print(arr_1)
## Selecting an element from array is exactly like python list
print(arr_1[0])
## variable name [index (starting 0)]
print(arr_1[1:5])
## Slicing [start:end]
print(arr_1[:6])
print(arr_1[2:])
print(arr_1[1:7:3])
##starting index 1, ending index 7 at the step of 3

[ 0  1  2  3  4  5  6  7  8  9 10]
0
[1 2 3 4]
[0 1 2 3 4 5]
[ 2  3  4  5  6  7  8  9 10]
[1 4]


In [12]:
## Numpy array is different from python list because its ability to broadcast
arr_1[0:5] = 100
arr_1

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [15]:
## But above assignment cannot be used on a list
my_list = list(range(0,11))
my_list[0:5] = 100
my_list
## Will report an error

TypeError: can only assign an iterable

In [19]:
slice_of_arr = arr_1[0:6]
print(slice_of_arr)
slice_of_arr[:]=99
print(slice_of_arr)
print(arr_1) ## will change its original array as well
arr_copy = arr_1.copy()
arr_copy[:] = 88
print(arr_copy)
print(arr_1)

[99 99 99 99 99 99]
[99 99 99 99 99 99]
[99 99 99 99 99 99  6  7  8  9 10]
[88 88 88 88 88 88 88 88 88 88 88]
[99 99 99 99 99 99  6  7  8  9 10]


In [21]:
## numpy index on matrix
arr_2d = np.array([[5,10,15],[20,25,30],[35,40,45]])
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [24]:
## Two ways to index a 2d array, double bracket vs single bracket
print(arr_2d[2,1]) ## recommended
print(arr_2d[2][1])

40
40


## 3. Several Reference Resources
- [jmportilla](http://nbviewer.jupyter.org/github/jmportilla/Udemy-notes/tree/master/)
- [Reference for all of numpy](http://docs.scipy.org/doc/numpy/reference/)
- [Stanford Python Numpy Tutorial](http://cs231n.github.io/python-numpy-tutorial/)