
<a href='https://huntsman.usu.edu/directory/jahangiry-pedram'> <img src="logo.jpg" /></a>

___
## Pedram Jahangiry 

# NumPy 

Topics to be covered:

1. Numpy arrays
2. Numpy indexing and extraction
3. Numpy operations



In [4]:
import numpy as np

##  1. NumPy Arrays

In [5]:
my_list = [0,1,2,3]
my_list

[0, 1, 2, 3]

In [7]:
np.array(my_list)

array([0, 1, 2, 3])

In [8]:
my_matrix = [[1,2,3],
            [4,5,6]]
my_matrix

[[1, 2, 3], [4, 5, 6]]

In [9]:
np.array(my_matrix)

array([[1, 2, 3],
       [4, 5, 6]])

In [14]:
# Built in methods
# np.arange() :Return evenly spaced values within a given interval.
np.arange(0,4)

array([0, 1, 2, 3])

In [15]:
np.arange(0,10,3)

array([0, 3, 6, 9])

In [16]:
# Zeros and ones : Generate arrays of zeros or ones
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [17]:
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [18]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [19]:
np.ones((5,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [20]:
# linspace : Return evenly spaced numbers over a specified interval. 
np.linspace(0,20,5)

array([ 0.,  5., 10., 15., 20.])

In [21]:
# eye: Creates identity matrix
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [41]:
# rand : create random samples from a uniform distribution
np.random.rand(5)

array([0.32613635, 0.88048172, 0.93898008, 0.05392863, 0.66067952])

In [46]:
np.random.rand(5,5)

array([[0.85114585, 0.26695179, 0.29900062, 0.93167168, 0.45452633],
       [0.49860575, 0.06978202, 0.70481482, 0.32798734, 0.28773164],
       [0.51874088, 0.67907272, 0.16583814, 0.73558282, 0.21024087],
       [0.99847689, 0.28812465, 0.58540587, 0.63071907, 0.43926806],
       [0.68480646, 0.84086154, 0.07028768, 0.85478052, 0.56813703]])

In [47]:
# randn : create random samples from standard normal distribution
np.random.randn(5)

array([0.62170567, 1.49693271, 0.52672314, 0.69581018, 1.11628512])

In [48]:
np.random.randn(5,5)

array([[ 0.35435385, -1.23412651,  0.36084087,  1.81512712,  1.16268366],
       [-1.90904486, -2.08851433, -0.21877651,  1.44226625,  1.16664213],
       [-1.05975148,  2.96386075,  0.49396791,  0.56724879,  0.54333433],
       [ 0.19747384,  0.63606927,  1.60686857,  0.94781631, -1.56166699],
       [-1.15244108, -0.33564161, -1.24831741, -0.18515085,  0.14902445]])

In [49]:
# randit(a,b) : create random sample of integers from a (including a) to b (excluding b) 
np.random.randint(1,5)

2

In [50]:
np.random.randint(1,5,20)

array([1, 4, 1, 2, 1, 2, 1, 2, 4, 3, 1, 3, 4, 3, 1, 1, 2, 4, 3, 2])

In [65]:
# seed is used to fix the random state.
np.random.seed(100)
np.random.randn(5)

array([-1.74976547,  0.3426804 ,  1.1530358 , -0.25243604,  0.98132079])

In [67]:
np.random.seed(100)
np.random.randn(4)

array([-1.74976547,  0.3426804 ,  1.1530358 , -0.25243604])

In [69]:
# array methods

my_array = np.arange(1,10)
my_array


array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [70]:
# reshape
new_array = my_array.reshape(3,3)
new_array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [71]:
new_array.shape

(3, 3)

In [72]:
new_array.dtype

dtype('int32')

In [74]:
type(new_array)

numpy.ndarray

In [75]:
my_array = np.append(my_array, [100,-100])
my_array

array([   1,    2,    3,    4,    5,    6,    7,    8,    9,  100, -100])

In [76]:
my_array.max()

100

In [77]:
my_array.argmax()

9

In [78]:
my_array.min()

-100

In [79]:
my_array.argmin()

10

## 2. Numpy indexing and extraction

In [80]:
my_array

array([   1,    2,    3,    4,    5,    6,    7,    8,    9,  100, -100])

In [83]:
# extraction is very similar to list extraction
my_array[1]

2

In [84]:
my_array[6:9]

array([7, 8, 9])

In [85]:
# With NumPy arrays, you can broadcast a single value across a larger set of values. This is not possible using lists. 

my_array[0:5]=100
my_array

array([ 100,  100,  100,  100,  100,    6,    7,    8,    9,  100, -100])

In [86]:
my_list = list(range(1,10))
my_list

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [87]:
my_list[0:5]

[1, 2, 3, 4, 5]

In [88]:
my_list[0:2]=100

TypeError: can only assign an iterable

In [89]:
my_list[0:2]=[100,100,100]
my_list

[100, 100, 100, 3, 4, 5, 6, 7, 8, 9]

In [94]:
# matrix: Note that matrix indexing in python is slightly different than R, Matlab or other programmings. 

my_matrix= np.arange(0,6).reshape(2,3)
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [97]:
# Format is matrix[row][col] or matrix[row,col]

# extracting the first row
my_matrix[0]

array([0, 1, 2])

In [98]:
# extracting the first column
my_matrix[:,0]

array([0, 3])

In [99]:
my_matrix[0][1] == my_matrix[0,1]

True

In [100]:
my_matrix[:2,1:]

array([[1, 2],
       [4, 5]])

Use google image to get help on "numpy array indexing" 


### Extracting with conditional selection


In [101]:
my_array

array([ 100,  100,  100,  100,  100,    6,    7,    8,    9,  100, -100])

In [106]:
my_array > 50

array([ True,  True,  True,  True,  True, False, False, False, False,
        True, False])

In [105]:
my_array[my_array>50]

array([100, 100, 100, 100, 100, 100])

This is another advantage of using arrays instead of lists. You cannot simply apply the comparison operators to a list. You must use filter() function instead. 

In [107]:
list(filter(lambda x: x>50, my_array))

[100, 100, 100, 100, 100, 100]

In [108]:
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [109]:
my_matrix > 1

array([[False, False,  True],
       [ True,  True,  True]])

In [110]:
my_matrix[my_matrix > 1]
# the shape is not reserved though!

array([2, 3, 4, 5])

## 3. Numpy operations

In [111]:
arr = np.arange(0,5)
arr

array([0, 1, 2, 3, 4])

In [112]:
arr + arr

array([0, 2, 4, 6, 8])

In [113]:
arr ** arr

array([  1,   1,   4,  27, 256], dtype=int32)

In [114]:
arr/arr

  """Entry point for launching an IPython kernel.


array([nan,  1.,  1.,  1.,  1.])

In [115]:
1/arr

  """Entry point for launching an IPython kernel.


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ])

In [116]:
# Square Roots
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ])

In [117]:
# Exponential
np.exp(arr)

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692, 54.59815003])

In [118]:
# Natural Logarithm
np.log(arr)

  


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436])

In [119]:
# summary statistics on arrays
arr

array([0, 1, 2, 3, 4])

In [120]:
arr.sum()

10

In [121]:
arr.mean()

2.0

In [122]:
arr.var()

2.0

In [123]:
arr.std()

1.4142135623730951

## Axis Logic
When working with 2-dimensional arrays (matrices) we have to consider rows and columns. This becomes very important when we get to the section on pandas. In array terms, axis 0 is the vertical axis (elements on the rows), and axis 1 is the horizonal axis (elements on the columns). These values (0,1) correspond to the order in which <tt>arr.shape</tt> values are returned.

Let's see how this affects our summary statistic calculations from above.

In [83]:
my_matrix

array([[0, 1, 2],
       [3, 4, 5]])

In [85]:
# axis 0 (zero) is doing a vertical operation, and axis 1 a horizonal one. 
# again, note that the logic is different from R, Matlab or etc
my_matrix.sum(axis=0)

array([3, 5, 7])

In [86]:
my_matrix.sum(1)

array([ 3, 12])

In [87]:
my_matrix * my_matrix  # note that this is not a matrix multiplication

array([[ 0,  1,  4],
       [ 9, 16, 25]])

In [88]:
# To do a matrix multiplication use np.dot (equivalent to %*% in R)
np.dot(my_matrix,my_matrix.T)

array([[ 5, 14],
       [14, 50]])

In [90]:
# inverse of a matrix
A = np.array([[1,2],[3,4]]) 
A

array([[1, 2],
       [3, 4]])

In [91]:
A_inv = np.linalg.inv(A)
A_inv

array([[-2. ,  1. ],
       [ 1.5, -0.5]])