# Introduction to NumPy

### Why use NumPy?
- NumPy arrays use less memory than normal python lists. 
    -  cf. A normal python list is a group of pointers to separate Python objects (e.g. the numbers inside the list).
- A NumPy array is designed to be an array of uniform values, without using space for type pointers.
- NumPy can also read in information faster than normal python and has lots of convenient broadcasting operations that can be performed across array dimensions. 

# NumPy Arrays

In [1]:
import numpy as np

In [2]:
mylist = [1,2,3]

In [3]:
np.array(mylist)

array([1, 2, 3])

In [4]:
nested_list = [[1,2],[3,4],[5,6]]

In [6]:
nested_list

[[1, 2], [3, 4], [5, 6]]

### np.array()

In [7]:
np.array(nested_list)

array([[1, 2],
       [3, 4],
       [5, 6]])

### np.arange()

In [8]:
np.arange(0,10) #arange([start,] stop[,step,], dtype=None)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
np.arange(0,10,2) #step size of 2

array([0, 2, 4, 6, 8])

### np.zeros()

In [10]:
np.zeros(3)

array([0., 0., 0.])

In [11]:
np.zeros((4,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

### np.ones()

In [12]:
np.ones(3)

array([1., 1., 1.])

In [13]:
np.ones((4,4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

### np.linspace()
Useful function which returns evenly spaced numbers over a specified interval.

_Not to be confused with what __np.arange__ is doing, which is setting a starting point, a stopping point and then taking some arbitrary step size._ 

We give it a starting point and a stopping point but then we ask for the number of actual elements we want evenly spaced between those two points. 

_Returns 'num' evenly spaced samples, calculated over the
interval ['start', 'stop']. The endpoint of the interval can optionally be excluded._

In [14]:
np.linspace(0,10,3)

array([ 0.,  5., 10.])

In [15]:
np.linspace(0,10,21)

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,
        5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. ])

In [16]:
np.linspace(0,10,20)

array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

### np.eye()

In [17]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### np.random.rand() 
Create an array of the given shape and populate it with random samples from a __uniform distribution__ over '[0,1]'.

<font color="orange">\* uniform distribution: everything has the same likelihood of being picked from 0 to 1.</font> 

In [18]:
np.random.rand(2)

array([0.63412393, 0.59425524])

In [19]:
np.random.rand(3,4)

array([[0.96997256, 0.19463455, 0.23248092, 0.24971864],
       [0.37558261, 0.69054728, 0.29881976, 0.75257113],
       [0.43052406, 0.38676131, 0.10875942, 0.13863031]])

### np.random.randn()
Retrun a sample (or samples) from the __standard normal distribution__.

<font color="orange">\* standard normal distribution: the mean is zero and the variance is 1 (so it will actually allow us to get some negative numbers if we do this).</font> 

In [20]:
np.random.randn(5,5)

array([[ 1.20271186,  1.12468834, -1.07065336,  1.12535232,  0.97932252],
       [ 0.3292685 ,  0.23825705,  0.25113645,  1.55289535, -0.2601217 ],
       [-1.12081536,  0.31069706, -0.29829114, -0.2932323 ,  0.11079301],
       [ 0.04280793,  1.35020233, -0.48675301, -1.23120018,  0.98369618],
       [ 1.79547721, -0.94841972, -1.2282643 , -0.93901535,  1.03721084]])

### np.random.randint()

In [21]:
np.random.randint(0,100)

98

In [22]:
np.random.randint(1,100,10) #how many

array([70, 22, 65, 50, 75, 37, 30,  2,  5, 26])

In [23]:
np.random.randint(1,100,(2,3)) #size

array([[ 4, 49, 69],
       [62, 99, 96]])

### np.random.seed()

In [25]:
np.random.seed(42)
np.random.rand(4)

array([0.37454012, 0.95071431, 0.73199394, 0.59865848])

In [26]:
np.random.rand(4)

array([0.15601864, 0.15599452, 0.05808361, 0.86617615])

### .reshape()

In [27]:
arr = np.arange(25)

In [29]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [31]:
arr.shape

(25,)

In [32]:
arr.reshape(5,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

However, this doesn't actually affect the original object permanently. If I ask for array again, I still get back the original.

In [33]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

If I wanted this to be permanent, I could simply just reassign as follows:

In [34]:
arr = arr.reshape(5,5) 

In [35]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [36]:
arr.reshape(3,6)

ValueError: cannot reshape array of size 25 into shape (3,6)

### .max(), .min(), .argmax(), .argmin()

In [28]:
ranarr = np.random.randint(0,50,10)

In [30]:
ranarr

array([35, 39, 23,  2, 21,  1, 23, 43, 29, 37])

##### .max()/.min()  
max/min value

In [37]:
ranarr.max()

43

In [39]:
ranarr.min()

1

##### .argmax()/.argmin()
the index of that max/min value

In [38]:
ranarr.argmax()

7

In [40]:
ranarr.argmin()

5

### .dtype

In [42]:
ranarr.dtype

dtype('int64')

In [43]:
myarr = np.random.rand(4)

In [44]:
myarr

array([7.78765841e-04, 9.92211559e-01, 6.17481510e-01, 6.11653160e-01])

In [45]:
myarr.dtype

dtype('float64')

# NumPy Index Selection

In [46]:
arr = np.arange(0,11)

In [47]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [48]:
arr[8]

8

In [49]:
arr[1:5]

array([1, 2, 3, 4])

In [50]:
arr[0:5]

array([0, 1, 2, 3, 4])

In [51]:
arr[:5]

array([0, 1, 2, 3, 4])

In [52]:
arr[5:]

array([ 5,  6,  7,  8,  9, 10])

### Broadcasting
In normal Python, you can only reassign parts of a list with new parts of the same shape and size and then you would also need then possibly reassign this to a new variable.

However, NumPy can actually broadcast a single value across a larger set of values.

In [53]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [54]:
#you would not be able to do this with a normal Python list
arr[0:5] = 100 

In [55]:
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

<font color="red">__Slicing a section of an array and setting it to a new variable will actually only act as a pointer to the original array.__</font>

In [56]:
arr = np.arange(0,11)

In [57]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [58]:
slice_of_arr = arr[0:5]

In [59]:
slice_of_arr

array([0, 1, 2, 3, 4])

In [60]:
slice_of_arr[:] = 99

In [61]:
slice_of_arr

array([99, 99, 99, 99, 99])

What happens with my original array?

In [62]:
arr

array([99, 99, 99, 99, 99,  5,  6,  7,  8,  9, 10])

With the NumPy, that slice_of_arr was actually when you assigned it here as arr[0:5], you're basically just pointing it to the original array, which means <font color="red">__this broadcasting operation has actually affected the original array__.

__If you wanted to not have it affect the original array, you would need to explicitly set a copy.__</font>

In [63]:
arr = np.arange(0,11)

In [64]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [65]:
arr_copy = arr.copy()

In [66]:
arr_copy[:] = 100

In [67]:
arr_copy

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [68]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

### Indexing on to the arrays or matrices

In [69]:
arr_2d = np.array([[5,10,15], [20,25,30], [35,40,45]])

In [70]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [71]:
arr_2d.shape

(3, 3)

In [72]:
arr_2d[2]

array([35, 40, 45])

In [73]:
arr_2d[0,2]

15

In [74]:
arr_2d[:2]

array([[ 5, 10, 15],
       [20, 25, 30]])

In [75]:
arr_2d[:2, 1:]

array([[10, 15],
       [25, 30]])

### Broadcast a condition or a comparison

In [76]:
arr = np.arange(1,11)

In [77]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [78]:
arr > 4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [79]:
bool_arr = arr > 4

In [80]:
arr[bool_arr]

array([ 5,  6,  7,  8,  9, 10])

In [81]:
arr[arr > 4]

array([ 5,  6,  7,  8,  9, 10])

# NumPy Operations

In [82]:
arr = np.arange(0,10)

In [83]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [84]:
arr + 5

array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [85]:
arr - 2

array([-2, -1,  0,  1,  2,  3,  4,  5,  6,  7])

In [86]:
arr + arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [87]:
arr * arr

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

Normal Python won't let you do these operations, but with NumPy, it will report a warning.

In [88]:
arr / arr

  arr / arr


array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [89]:
1 / arr

  1 / arr


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111])

In [90]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [91]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [92]:
np.log(arr)

  np.log(arr)


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])

### Summary statistics

In [93]:
arr.sum()

45

In [94]:
arr.mean()

4.5

In [95]:
arr.max()

9

In [96]:
arr.var()

8.25

In [97]:
arr.std()

2.8722813232690143

In [98]:
arr2d = np.arange(0,25).reshape(5,5)

In [99]:
arr2d.shape

(5, 5)

In [100]:
arr2d

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [101]:
arr2d.sum()

300

Perform this operation across the rows
(= the sum of the column)

In [102]:
arr2d.sum(axis=0) 

array([50, 55, 60, 65, 70])

Perform this operation across the columns
(= the sum of the row)

In [103]:
arr2d.sum(axis=1) 

array([ 10,  35,  60,  85, 110])