# NumPy
If you want to type along with me, use [this notebook](https://humboldt.cloudbank.2i2c.cloud/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2Fbethanyj0%2Fdata271_sp24&branch=main&urlpath=tree%2Fdata271_sp24%2Fdemos%2Fdata271_demo9_live.ipynb) instead. 
If you don't want to type and want to follow along just by executing the cells, stay in this notebook. 

In [1]:
# Whenever you want to use numpy import it with the following code
import numpy as np

In [9]:
np.array([1,2,3])

array([1, 2, 3])

In [10]:
type(array)

numpy.ndarray

In [26]:
arr2d=np.array([[1,2,3],[4,5,6]])
arr2d

array([[1, 2, 3],
       [4, 5, 6]])

## Attributes

In [27]:
# number of dimensions
arr2d.ndim

2

In [28]:
# shape of the array
arr2d.shape

(2, 3)

In [29]:
# size of the array (how many total elements)
arr2d.size

6

In [30]:
# type of the elements within the array
arr2d.dtype

dtype('int64')

## Why NumPy?

In [193]:
# Base Python data structures can't handle elementwise operations
lst = [1,2,3]
lst**2

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

In [194]:
# NumPy can
arr = np.array([1,2,3])
arr**2

array([1, 4, 9])

Numpy is more computationally efficient

In [207]:
# How long to double every element in a big list
big_list = list(range(1000000))
%timeit [i**2 for i in big_list]

22.5 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [208]:
# How long to double every element in a big array
big_array = np.arange(1000000)
%timeit big_array**2

529 µs ± 118 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Creating NumPy arrays

In [37]:
# Manually enter each element
np.array([1,2,3])

array([1, 2, 3])

In [43]:
# Create sequential array with np.arange
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [44]:
# Indicate start, stop, and step in np.arange
np.arange(2,10,2)

array([2, 4, 6, 8])

In [47]:
# Create a set number of equally spaced elements with np.linspace(start,stop,number)
np.linspace(2,5,10)

array([2.        , 2.33333333, 2.66666667, 3.        , 3.33333333,
       3.66666667, 4.        , 4.33333333, 4.66666667, 5.        ])

In [48]:
# ndarray of ones
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [49]:
# ndarray of zeros
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [50]:
# identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [53]:
# Fill whole nd array with a specific value
np.full((3,4),2)

array([[2, 2, 2, 2],
       [2, 2, 2, 2],
       [2, 2, 2, 2]])

### Converting other data structures to numpy arrays

In [216]:
lst = [1,2,3]
type(lst)

list

In [217]:
np.asarray(lst)

array([1, 2, 3])

In [218]:
tup = (1,2,3)
np.asarray(tup)

array([1, 2, 3])

In [219]:
# Not typically used 
dct = {1:2,3:4}
np.asarray(dct)

array({1: 2, 3: 4}, dtype=object)

In [222]:
# Not typically used 
my_set = {1,2,3,3}
np.asarray(my_set)

array({1, 2, 3}, dtype=object)

## Converting data types

In [87]:
arr2 = np.array((0,2,3))
arr2.dtype

dtype('int64')

In [88]:
arr2.astype('float64')

array([0., 2., 3.])

In [89]:
arr2.astype('bool')

array([False,  True,  True])

## Arithmetic with NumPy Arrays

In [223]:
# Performs elementwise arithmetic
arr1 = np.array([[2,3],[4,5]])
arr2 = np.array([[3,4],[5,6]])

array([[-1, -1],
       [-1, -1]])

In [224]:
arr1

array([[2, 3],
       [4, 5]])

In [225]:
arr2

array([[3, 4],
       [5, 6]])

In [226]:
arr1-arr2

array([[-1, -1],
       [-1, -1]])

In [96]:
arr1+arr2

array([[-1, -1],
       [-1, -1]])

In [98]:
arr1*arr2

array([[ 6, 12],
       [20, 30]])

In [99]:
arr1/arr2

array([[0.66666667, 0.75      ],
       [0.8       , 0.83333333]])

In [100]:
1/arr1

array([[0.5       , 0.33333333],
       [0.25      , 0.2       ]])

In [105]:
arr1 > arr2

array([[False, False],
       [False, False]])

# Indexing and slicing

In [131]:
arr = np.arange(10)

In [120]:
# indexing 
arr[3]

3

In [121]:
# slicing
arr[3:5]

array([3, 4])

In [133]:
# to update elements with slicing
arr[3:5]=0
arr

array([0, 1, 2, 0, 0, 5, 6, 7, 8, 9])

In [137]:
arr_slice = arr[3:5]
arr_slice

array([0, 0])

In [138]:
arr_slice[0]=20
arr_slice

array([20,  0])

In [139]:
arr

array([ 0,  1,  2, 20,  0,  5,  6,  7,  8,  9])

### Indexing/slicing 2d arrays

In [182]:
arr2d

array([[1, 2, 3],
       [4, 5, 6]])

In [183]:
# elements can be accessed recursively
arr2d[0][1]

2

In [184]:
# Or with a comma
arr2d[0,1]

2

In [240]:
# access a "row"
arr2d[0]

array([1, 2, 3])

In [241]:
# access a single "row" gives a 1d array
arr2d[0].shape

(3,)

In [235]:
# access a "row" another way
arr2d[0:1,]

array([[1, 2, 3]])

In [236]:
# access a "row" another way gives 2d array
arr2d[0:1,].shape

(1, 3)

In [242]:
# access a "column"
arr2d[:,0]

array([1, 4])

In [243]:
# access a "column" gives a 1d array
arr2d[:,0].shape

(2,)

In [244]:
# access a "column" another way
new_arr = arr2d[:,0]
new_arr[:,np.newaxis]

array([[1],
       [4]])

In [245]:
# access a "column" another way gives 2d array
new_arr[:,np.newaxis].shape

(2, 1)

## Functions

In [248]:
np.mean(arr2d)

3.5

In [249]:
# Get the mean of each column "compute the mean across rows"
np.mean(arr2d,axis=0)

array([2.5, 3.5, 4.5])

In [250]:
# Get the mean of each row "compute the mean across columns"
np.mean(arr2d,axis=1)

array([2., 5.])