Let's talk more! about numpy
----------------------

**Going from lists to arrays and figuring out if that worked well...**

In [3]:
import numpy as np
import pickle as pkl

# how do I make a numpy array from a python list?
pylist = [[10, 1, 2021], [2, 9, 2022]]
nparray = np.array(pylist)

# how do I print a numpy array?
print(nparray)
print(nparray.shape)

with open('data.pkl', 'wb') as f:
    pkl.dump(nparray, f)

# and back to a list?

with open('data.pkl', 'rb') as f:
    nparray2 = pkl.load(f)
    print(nparray2)
    
backtolist = nparray.tolist()
print(backtolist)

[[  10    1 2021]
 [   2    9 2022]]
(2, 3)
[[  10    1 2021]
 [   2    9 2022]]
[[10, 1, 2021], [2, 9, 2022]]


In [2]:
# how do I make a numpy array from a python list?
pylist = [[10, 1, 2021], [2, 9]]
nparray = np.array(pylist)

# how do I print a numpy array?
print(nparray)
print(nparray.shape)

[list([10, 1, 2021]) list([2, 9])]
(2,)


  nparray = np.array(pylist)


In [4]:
# how do I figure out the type of a numpy array? 
print(nparray.dtype)

# hmm, will this work and why or why not?
print(backtolist.dtype)

int32


AttributeError: 'list' object has no attribute 'dtype'

In [6]:
# how do I change the type of a numpy array?
# this word is useful: coerce
nparrayFloat = nparray.astype(float)
print(nparrayFloat.dtype)
print(nparrayFloat)

nparrayStr = np.array(nparrayFloat, dtype=str)
print(nparrayStr.dtype)
print(nparrayStr)

nparrayFloat = nparrayStr.astype(float)
print(nparrayFloat.dtype)
print(nparrayFloat)

float64
[[1.000e+01 1.000e+00 2.021e+03]
 [2.000e+00 9.000e+00 2.022e+03]]
<U32
[['10.0' '1.0' '2021.0']
 ['2.0' '9.0' '2022.0']]
float64
[[1.000e+01 1.000e+00 2.021e+03]
 [2.000e+00 9.000e+00 2.022e+03]]


In [10]:
nparrayFloat = np.array([[10.45, 1.35, 2021.75], [2, 9, 2022]])
print(nparrayFloat.dtype)
print(nparrayFloat)

# be careful! this is lossy, and it cuts rather than rounding!
nparrayInt = nparray.astype(int)
print(nparrayInt.dtype)
print(nparrayInt)

float64
[[1.04500e+01 1.35000e+00 2.02175e+03]
 [2.00000e+00 9.00000e+00 2.02200e+03]]
int32
[[  10    1 2021]
 [   2    9 2022]]


In [13]:
# (review!) how do I see the number of dimensions, number of elements, and shape of a numpy array?
print(nparray.ndim)
print(nparray.size)
print(nparray.shape)

2
6
(2, 3)


**Making numpy arrays...**

In [19]:
# make an array of zeros
nparrayZero = np.zeros([3, 10])
print(nparrayZero)

# that's floats ... what if we want ints?
# either of these two ways
#nparrayZeroInt = nparrayZero.astype(int)
nparrayZeroInt = np.array(nparrayZero, dtype=int)
print(nparrayZeroInt)

# what if we want ones instead of zeros?
nparrayOnes = np.ones([3, 10])
print(nparrayOnes)

# what if we want sevens?
nparraySevens = np.ones([3, 10])*7
print(nparraySevens)


[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
[[7. 7. 7. 7. 7. 7. 7. 7. 7. 7.]
 [7. 7. 7. 7. 7. 7. 7. 7. 7. 7.]
 [7. 7. 7. 7. 7. 7. 7. 7. 7. 7.]]


In [23]:
# make an array of random values
nparrayRandomFloat = np.random.random([3, 10])
print(nparrayRandomFloat)
print(nparrayRandomFloat.dtype)

# what if we want random ints? let's see...
# that does not work!
# nparrayRandomInt = np.random.random([3, 10], dtype=int)

# hmm, if not that then what?
print((nparrayRandomFloat*10).astype(int))
# what is the other way?


[[0.92421174 0.96271017 0.71533742 0.30484619 0.98457859 0.45417741
  0.29235221 0.72986095 0.82274969 0.31591523]
 [0.94267646 0.19812957 0.48915982 0.844199   0.54939435 0.02671617
  0.74180409 0.05619306 0.71543221 0.3335045 ]
 [0.63892484 0.71049869 0.58302421 0.43262473 0.65692652 0.53698944
  0.86209276 0.61478125 0.71950601 0.94264986]]
float64
[[9 9 7 3 9 4 2 7 8 3]
 [9 1 4 8 5 0 7 0 7 3]
 [6 7 5 4 6 5 8 6 7 9]]


In [28]:
# what if we want random floats in an interval?
print(np.linspace(0, 10, 10))

# what if we want to shape that into a 2 by 5 array?
nplinArray = np.linspace(0, 10, 10)
np2dArray = np.reshape(nplinArray, (2, 5))
print(np2dArray)

# what if we want random ints in an interval?
print(np.linspace(0, 10, 10, dtype=int))

[ 0.          1.11111111  2.22222222  3.33333333  4.44444444  5.55555556
  6.66666667  7.77777778  8.88888889 10.        ]
[[ 0.          1.11111111  2.22222222  3.33333333  4.44444444]
 [ 5.55555556  6.66666667  7.77777778  8.88888889 10.        ]]
[ 0  1  2  3  4  5  6  7  8 10]


**Getting access to elements and "slices" of numpy arrays...**

In [30]:
# (review!) how do I access an element in an array?
print(nparrayRandomFloat)
print("\n")
print(nparrayRandomFloat[0][0])

# is there a prettier way?
print(nparrayRandomFloat[0, 0])

# this is only marginally prettier for a 2-d array but imagine a 10-d array!

[[0.92421174 0.96271017 0.71533742 0.30484619 0.98457859 0.45417741
  0.29235221 0.72986095 0.82274969 0.31591523]
 [0.94267646 0.19812957 0.48915982 0.844199   0.54939435 0.02671617
  0.74180409 0.05619306 0.71543221 0.3335045 ]
 [0.63892484 0.71049869 0.58302421 0.43262473 0.65692652 0.53698944
  0.86209276 0.61478125 0.71950601 0.94264986]]


0.9242117354884198
0.9242117354884198


In [41]:
print(nparrayRandomFloat)

# how do I access the whole second column?
#print("first column")
#print(nparrayRandomFloat[:, 1])

# of what type is the first column, hmm? leaving as an exercise

# what about the whole second row?
#print("first row")
#print(nparrayRandomFloat[1, :])

# what about the last two rows?
print("last two rows")
nparrayRandomFloat[1:, :]
# or this
nparrayRandomFloat[-2:, :]

# what about the first row and last two columns?
print("first row, last two columns")
print(nparrayRandomFloat[0, -2:])

[[0.92421174 0.96271017 0.71533742 0.30484619 0.98457859 0.45417741
  0.29235221 0.72986095 0.82274969 0.31591523]
 [0.94267646 0.19812957 0.48915982 0.844199   0.54939435 0.02671617
  0.74180409 0.05619306 0.71543221 0.3335045 ]
 [0.63892484 0.71049869 0.58302421 0.43262473 0.65692652 0.53698944
  0.86209276 0.61478125 0.71950601 0.94264986]]
last two rows
first row, last two columns
[0.82274969 0.31591523]


In [43]:
print(nparrayRandomFloat.shape[0])
print(np.arange(nparrayRandomFloat.shape[0]))
# how do I access the 1st and 3rd columns? Well, really the 2nd and the 4th
print("first and third columns")
print(nparrayRandomFloat[np.ix_(np.arange(nparrayRandomFloat.shape[0]), [1, 3])])

# whaaaat was that?

3
[0 1 2]
first and third columns
[[0.96271017 0.30484619]
 [0.19812957 0.844199  ]
 [0.71049869 0.43262473]]


**Modifying (slices of) arrays...**

In [None]:
# how do I *change* the element at 1, 1 of the array?

In [None]:
# and now for some magic! how do I assign the second row to 1s?
nparrayRandomFloat[1] = 1
print(nparrayRandomFloat)
print(nparrayRandomFloat.dtype)

# how do I assign the second row to increasing ints?
nparrayRandomFloat[1] = np.arange(nparrayRandomFloat.shape[1])
print(nparrayRandomFloat)
print(nparrayRandomFloat.dtype)

# how do I assign the second row to 3* itself?


**Copying numpy arrays...**

In [None]:
# let's try the obvious thing
nparrayRandomFloat2 = nparrayRandomFloat
print("nparrayRandomFloat")
print(nparrayRandomFloat)
print("nparrayRandomFloat2")
print(nparrayRandomFloat2)

In [None]:
nparrayRandomFloat2[0,0] = 0
print("nparrayRandomFloat")
print(nparrayRandomFloat)
print("nparrayRandomFloat2")
print(nparrayRandomFloat2)

# whaaat just happened??
# how do we stop that happening?? hint, what are we doing? we are *copying*

**Doing other things to a whole row or column...**

In [None]:
# (review!) how do we assign value(s) to a row or column?
nparrayRandomFloat[:1] = np.zeros(nparrayRandomFloat.shape[1])
print(nparrayRandomFloat)

In [None]:
# let's sum across each column
np.sum(nparrayRandomFloat, axis=0)

# how would we sum across each row?



In [None]:
# what if we had a tensor?
nptensorFloat = np.ones([3, 4, 5])
print(nptensorFloat)

np.sum(nptensorFloat, axis=2)

In [None]:
# what if we don't specify an axis?

In [None]:
# what other functions can we apply across axes?

In [None]:
# let's take it up a notch

nparrayRandomInt = np.random.randint(low=0, high=10, size=(3,4))
print(nparrayRandomInt)

print(nparrayRandomInt - np.min(nparrayRandomInt, axis=0))

# whaaat just happened? let's look at the shapes


In [None]:
# why is this cool?
import timeit

def sumLoop():
    '''Use for loop to sum a row vector'''
    longRow = np.array([i for i in range(1, 1000000000)])
    theSum = 0
    for i in range(len(longRow)):
        theSum += longRow[i]

def sumVectorized():
    '''Vectorized version of summing a row vector'''
    longRow = np.array([i for i in range(1, 1000000000)])
    theSum = np.sum(longRow)

print(timeit.timeit(lambda: sumLoop))
print(timeit.timeit(lambda: sumVectorized))

In [None]:
# what if we try to do the subtract-min thing across axis 1?
print(nparrayRandomInt - np.min(nparrayRandomInt, axis=1))


In [None]:
# how can we fix that? make the arrays shape-compatible!
print(nparrayRandomInt - np.min(nparrayRandomInt, axis=1)[:, np.newaxis])

In [None]:
# is there another way to achieve this?
print(nparrayRandomInt - np.min(nparrayRandomInt, axis=1, keepdims=True))


Five Jupyter tips
------------------

1. To run a cell, you can hit Ctrl+Enter 
2. Quite often, a Jupyter "mistake" happens if you forget that this cell has all the memory of every cell that was already run, 
3. and only those cells, 
4. and only the last time they were run
5. To go into "select mode", hit Esc, then you can easily navigate from cell to cell

Markdown
--------

These cells that look like fancy text are in Markdown. Markdown cheat sheet: https://www.markdownguide.org/cheat-sheet