Announcements:

- See the long email from yesterday
- Gradescope HW1 regrade request is turned on 

Monday: generators, modules (= Python superpower)

Today: Numpy (= Python superpower module for efficient data science and scientific computation)

Arrays 

https://jakevdp.github.io/PythonDataScienceHandbook/02.01-understanding-data-types.html

In [1]:
L = [1, 'harlin', 23.51, (9, 2)]

# easy to manipulate, iterable, contain different types of data
# downside: can be kind of slow

In [2]:
for i in L:
    print(id(i))

140580074166576
140580184265648
140580183808144
140580184575488


In [3]:
import numpy as np

In [4]:
x = np.array([1,2,3,4]) 

# numpy array occupies a single continuous chunk of memory
# downside: all elements in numpy array has to be of the same data type

In [5]:
L1 = list(range(100)) # [0, 1, 2, ... 99]
L2 = list(range(100)) # same

In [6]:
def add_lists(l1, l2):
    return [l1[i] + l2[i] for i in range(len(l1))]

In [7]:
%timeit add_lists(L1, L2)

9.15 µs ± 213 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [8]:
A1 = np.array(L1) # you can create a numpy array by calling np.array(l) where l is a list
A2 = np.array(L2)

In [9]:
%timeit A1 + A2

437 ns ± 2.95 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


vectorization: operate on all elements of an array in entrywise function instead of writing a for-loop

In [10]:
a1 = np.array([1,2,3,4,5])
a2 = np.array([6,7,8,9,10])

In [11]:
a1 = np.array([1,2,3,4,5])
a2 = np.array([6,7,8,9,10, 11])

a1+ a2

ValueError: operands could not be broadcast together with shapes (5,) (6,) 

In [12]:
# for i in range(len(..)):
#     # do somethig...

In [13]:
a1*a2

ValueError: operands could not be broadcast together with shapes (5,) (6,) 

In [14]:
2*a1

array([ 2,  4,  6,  8, 10])

In [15]:
2 + a1

array([3, 4, 5, 6, 7])

In [16]:
# I have x values between 0 and 2pi
# I want to get sin(x)

In [17]:
import math

In [18]:
def list_sin(L):
    return [math.sin(l) for l in L]

In [19]:
a = np.linspace(0, 2*np.pi, 101) # from 0 (inclusive) to 2*np.pi (inclusive), get 101 evenly spaced points

In [20]:
L = list(a)

In [21]:
%timeit list_sin(L)

12 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [22]:
%timeit np.sin(a)

1.49 µs ± 32.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


creating arrays

In [23]:
L = [1,2,3,4]
a = np.array(L)

print(type(L), type(a))

<class 'list'> <class 'numpy.ndarray'>


In [24]:
n = 5

a = np.zeros(n) # [0, 0, 0, 0, 0]

a

array([0., 0., 0., 0., 0.])

In [25]:
a = np.ones(n) # [1, 1, 1, 1, 1]

a


array([1., 1., 1., 1., 1.])

In [26]:
np.random.rand(n) # random floats between 0 and 1 --- of length n

array([0.10700103, 0.59676009, 0.7187307 , 0.87733595, 0.59501386])

In [27]:
# range(n)
np.arange(n)

array([0, 1, 2, 3, 4])

In [28]:
np.linspace(0, 1, 3) # 3 evenly-spaced points in linear scale between 0 and 1, inclusive
# np.logspace gives evenly spaced points in log scale

array([0. , 0.5, 1. ])

multidimensional arrays


`shape`

In [29]:
A = np.ones((n, n))

In [30]:
A # n by n matrix of all ones

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [31]:
A.shape

(5, 5)

In [32]:
a = np.zeros(n)
a

array([0., 0., 0., 0., 0.])

In [33]:
print(a)

[0. 0. 0. 0. 0.]


In [34]:
a.shape

(5,)

In [35]:
a1 = np.ones((n, 1))
print(a1)
print(a1.shape)

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]
(5, 1)


In [36]:
a1 = np.ones((1, n))
print(a1)
print(a1.shape)

[[1. 1. 1. 1. 1.]]
(1, 5)


In [37]:
A = np.ones((3,4,5))
A

array([[[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]]])

In [38]:
A.shape

(3, 4, 5)

`reshape` method allows you to alter the dimensions of an array

In [39]:
a = np.arange(15) # [0, ..., 14]
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [40]:
A = a.reshape(3, 5) # change to 3 rows and 5 columns

A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [41]:
A.reshape(5,3) 

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [42]:
A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [43]:
A.T # setter methods

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [44]:
A.reshape(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

indexing, slicing, boolean indexing

In [45]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [46]:
a[8] # indexing

8

In [47]:
a[-1] # negative indexing

9

In [48]:
a[:4] # slicing

array([0, 1, 2, 3])

in boolean indexing, the boolean comparisons in numpy are vectorized

In [49]:
a > 5 # returns an array of bool - this is vectorized!

array([False, False, False, False, False, False,  True,  True,  True,
        True])

In [50]:
# [1, 6] > 5 

In [51]:
a[a>5] # only give elements greater than 5

array([6, 7, 8, 9])

In [52]:
a[[1,4]] # give elements of index 1 and 4

array([1, 4])

In [53]:
# if .... and, or, not

# elif ....

# else: 
#     ...

In [54]:
# & (and) | (or) bitwise operators

In [55]:
a[(a>5) & (a%2==0)] # give elements greater than 5 AND even

array([6, 8])

In [56]:
(a>5) 

array([False, False, False, False, False, False,  True,  True,  True,
        True])

In [57]:
(a%2==0)

array([ True, False,  True, False,  True, False,  True, False,  True,
       False])

In [58]:
(a>5) & (a%2==0)

array([False, False, False, False, False, False,  True, False,  True,
       False])

In [59]:
a[(a>5) | (a%2==0)] # give elements greater than  5 OR even

array([0, 2, 4, 6, 7, 8, 9])

In [60]:
(a>5) | (a%2==0)

array([ True, False,  True, False,  True, False,  True,  True,  True,
        True])

In [61]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [62]:
a[5] = 50 # modify a single value with indexing

In [63]:
a

array([ 0,  1,  2,  3,  4, 50,  6,  7,  8,  9])

In [64]:
a = np.arange(10)
a[a>5] = 50 # look up values greater than 5, modify all of them to 50


In [65]:
a

array([ 0,  1,  2,  3,  4,  5, 50, 50, 50, 50])

In [66]:
a = np.arange(10)

a[a > 5] = np.array([60,70,80,90]) #  look up values greater than 5, modify them to this array values

In [67]:
a

array([ 0,  1,  2,  3,  4,  5, 60, 70, 80, 90])

In [68]:
a = np.arange(10)

a[a > 5] = np.array([60,70,80])

ValueError: NumPy boolean array indexing assignment cannot assign 3 input values to the 4 output values where the mask is true