In [None]:
#numpy

#homogenous multidimensional arrays
# a way of having grids (or matrices) of elements; all elements are the same data type

## Arrays vs lists

| `numpy` arrays | Python `list` |
|---|---|
| all elements must be the same type | elements can be different types |
| fixed size | can change size |
| n-dimensional | 1-dimensional |
| faster to process | slower to process |
| consumes less memory | consumes more memory |

In [1]:
import numpy as np

In [2]:
a = np.array([[1, 2, 3,],
              [3, 2, 1]])
a

array([[1, 2, 3],
       [3, 2, 1]])

In [3]:
a2 = np.array([1,2,3])
a2

array([1, 2, 3])

In [4]:
# number of dimensions
print(a.ndim)

2


In [5]:
print(a2.ndim)

1


In [6]:
# the shape will be rows x columns --> for a matrix
print(a.shape)

(2, 3)


In [7]:
print(a2.shape)

(3,)


In [8]:
# the size will be rows * columns --> for a matrix
print(a.size)

6


In [9]:
print(a2.size)

3


In [10]:
# check the data type of all the elements
print(a.dtype)

int64


In [11]:
print(a2.dtype)

int64


In [12]:
# create 2x3x4 array of zeros
np.zeros((2,3,4))

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [13]:
# create a 2x3x2 array of zeros
np.zeros((2,3,2))

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [14]:
np.ones_like(a)

array([[1, 1, 1],
       [1, 1, 1]])

In [15]:
# create a 1D array that goes from 1 to 10 in steps of 2
np.arange(1, 10, 2)

array([1, 3, 5, 7, 9])

In [16]:
# create a 1D array that goes from 0 to 1 in steps of 0.1
np.arange(0, 1, 0.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [32]:
# you can set a seed to create a reproducible random example
np.random.seed(50)

# creating a 3x4 array of random integers that range from 1 to 10
np.random.randint(1,10,(3,4))

array([[1, 1, 2, 5],
       [7, 6, 7, 7],
       [6, 3, 8, 5]])

In [34]:
# repeat values and arrays to create bigger ones

# creating a 1D array through repetition
np.repeat(10, 5)

array([10, 10, 10, 10, 10])

In [36]:
# creating a 2D array through repetition
onedim_arr = np.array([1, 2, 3, 4, 5])
multidim_arr = np.tile(onedim_arr, (5,1))
multidim_arr

array([[1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5]])

In [38]:
multidim_arr = np.repeat(onedim_arr, 5)
multidim_arr

array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5,
       5, 5, 5])

In [40]:
multidim_arr = np.tile(onedim_arr, (1,5))
multidim_arr

array([[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2,
        3, 4, 5]])

In [42]:
multidim_arr = np.tile(onedim_arr, (3,3))
multidim_arr

array([[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]])

In [43]:
#simulating data

# create a sample Generator
rng = np.random.default_rng(seed=1)

# create a 5 x 4 array with normally distributed data
# mean 10, std 2.5
randrng= rng.normal(10, 2.5, (5, 4))
randrng

array([[10.86396048, 12.05404536, 10.82609269,  6.74210692],
       [12.26338967, 11.11593643,  8.65761691, 11.45279526],
       [10.91143099, 10.73533124, 10.0710556 , 11.36678247],
       [ 8.15886478,  9.59272513,  8.79470172, 11.49711553],
       [10.09930527,  9.26885812,  8.04522884,  9.3570194 ]])

In [44]:
# use the same generator to draw random sample from Poisson distribution
# lam is 1 and size is 5x6
poissonrng = rng.poisson(1, (5, 6))
poissonrng

array([[1, 3, 1, 0, 2, 2],
       [1, 1, 0, 2, 0, 2],
       [1, 3, 1, 0, 3, 0],
       [0, 3, 0, 3, 2, 1],
       [4, 0, 1, 1, 1, 1]])

In [45]:
b = np.array([[5, 4, 3, 2, 1, 0],
              [10, 8, 6, 4, 2, 0]])

#b is a 2 x 6 matrix
b

array([[ 5,  4,  3,  2,  1,  0],
       [10,  8,  6,  4,  2,  0]])

In [46]:
#after we transpose we have a 6 x 2 matrix
b.T

array([[ 5, 10],
       [ 4,  8],
       [ 3,  6],
       [ 2,  4],
       [ 1,  2],
       [ 0,  0]])

In [47]:
# change dimensions
b.reshape(4, 3)

array([[ 5,  4,  3],
       [ 2,  1,  0],
       [10,  8,  6],
       [ 4,  2,  0]])

In [48]:
# collapse an array to one dimension
b.flatten()

array([ 5,  4,  3,  2,  1,  0, 10,  8,  6,  4,  2,  0])

In [49]:
a

array([[1, 2, 3],
       [3, 2, 1]])

In [50]:
b

array([[ 5,  4,  3,  2,  1,  0],
       [10,  8,  6,  4,  2,  0]])

In [51]:
# stack a and b horizonally
#make sure that you pass the array to stack as a tuple
np.hstack((b, a))

array([[ 5,  4,  3,  2,  1,  0,  1,  2,  3],
       [10,  8,  6,  4,  2,  0,  3,  2,  1]])

In [53]:
# stack vertically
np.vstack((b.reshape(4,3), a))

array([[ 5,  4,  3],
       [ 2,  1,  0],
       [10,  8,  6],
       [ 4,  2,  0],
       [ 1,  2,  3],
       [ 3,  2,  1]])

In [54]:
#basic operations

arr1 = np.array([5, 10, 15, 20])
arr1

array([ 5, 10, 15, 20])

In [56]:
arr2 = np.arange(5, 9)
arr2

array([5, 6, 7, 8])

In [57]:
np.sin(np.arange(0, 360, 45)*np.pi/180)

array([ 0.00000000e+00,  7.07106781e-01,  1.00000000e+00,  7.07106781e-01,
        1.22464680e-16, -7.07106781e-01, -1.00000000e+00, -7.07106781e-01])

In [58]:
arr1 - arr2

array([ 0,  4,  8, 12])

In [59]:
results = []
for i, j in zip(arr1, arr2):
  results.append(i-j)

results

[0, 4, 8, 12]

In [60]:
# multiplying each element in arr1 by 2
arr1 * 2

array([10, 20, 30, 40])

In [61]:
arr3 = np.array([1, 2])
arr3

array([1, 2])

In [62]:
arr2 + arr3

ValueError: ignored

In [63]:
short_output = []
for i, j in zip(arr2, arr3):
  short_output.append(i + j)

short_output

[6, 8]

In [64]:
# helpful for assignment 1
arr1

array([ 5, 10, 15, 20])

In [65]:
#we can summarize the values in an array using methods
print(f'''arr1 sums to {arr1.sum()}.
Its max value is {arr1.max()}, and its mean is {arr1.mean()}.''')

arr1 sums to 50.
Its max value is 20, and its mean is 12.5.


In [66]:
arr1.sum()

50

In [67]:
# we can also get descriptive statistics using functions
np.median(arr1)

12.5

In [68]:
arr1.median()

AttributeError: ignored

In [71]:
# operations in multiple dimensions

tens = np.arange(0, 120, 10).reshape(3,4)
tens

array([[  0,  10,  20,  30],
       [ 40,  50,  60,  70],
       [ 80,  90, 100, 110]])

In [74]:
horizontal = np.array([-5, -10, -15, -20])
horizontal

array([ -5, -10, -15, -20])

In [75]:
tens + horizontal 

array([[-5,  0,  5, 10],
       [35, 40, 45, 50],
       [75, 80, 85, 90]])

In [76]:
vertical = np.array([[100],
                     [200],
                     [300]])
vertical

array([[100],
       [200],
       [300]])

In [77]:
tens + vertical

array([[100, 110, 120, 130],
       [240, 250, 260, 270],
       [380, 390, 400, 410]])

In [78]:
#calculate statistics for multidimensional arrays
#difference is that now we have to specify the axis
#if your axis is 0 --> calculating values for each column
#if your axis is 1 --> calculating values for each row

tens

array([[  0,  10,  20,  30],
       [ 40,  50,  60,  70],
       [ 80,  90, 100, 110]])

In [79]:
tens.mean(axis=0)

array([40., 50., 60., 70.])

In [81]:
(10 + 50 + 90)/3

50.0

In [82]:
tens.mean(axis=1)

array([15., 55., 95.])

In [83]:
(0 + 10 + 20 + 30)/4

15.0

In [84]:
#inexing, slicing, and iterating
arr1

array([ 5, 10, 15, 20])

In [85]:
#Python starts indexing from 0
arr1[1]

10

In [86]:
#slicing
arr1[1:3]

array([10, 15])

In [87]:
#iterating
for i in arr1:
  print(i)

5
10
15
20


In [88]:
tens

array([[  0,  10,  20,  30],
       [ 40,  50,  60,  70],
       [ 80,  90, 100, 110]])

In [89]:
#indexing

#tens[row, column]
tens[1, 2]

60

In [90]:
tens[0]

array([ 0, 10, 20, 30])

In [91]:
tens[:,0]

array([ 0, 40, 80])

In [92]:
tens[0:2,1:3]

array([[10, 20],
       [50, 60]])

In [99]:
#mutations and copies

# create a 3 x 4 array of random integers
matrix = np.random.randint(1, 11, 12).reshape(3, 4)
matrix2 = matrix

#make a copy
matrix3 = matrix.copy()

matrix

array([[4, 6, 4, 7],
       [9, 7, 1, 9],
       [3, 2, 2, 7]])

In [105]:
matrix = matrix4


NameError: ignored

In [100]:
matrix2[1] = [0, 0, 0, 0]
matrix2

array([[4, 6, 4, 7],
       [0, 0, 0, 0],
       [3, 2, 2, 7]])

In [101]:
matrix

array([[4, 6, 4, 7],
       [0, 0, 0, 0],
       [3, 2, 2, 7]])

In [102]:
matrix3

array([[4, 6, 4, 7],
       [9, 7, 1, 9],
       [3, 2, 2, 7]])

In [106]:
# logic and filtering

tens

array([[  0,  10,  20,  30],
       [ 40,  50,  60,  70],
       [ 80,  90, 100, 110]])

In [107]:
tens % 3 == 0

array([[ True, False, False,  True],
       [False, False,  True, False],
       [False,  True, False, False]])

In [108]:
tens == 20

array([[False, False,  True, False],
       [False, False, False, False],
       [False, False, False, False]])

In [None]:
# these resulting arrays can be called masks because they are used to mask, or filter, data

In [112]:
masked = []

for row in tens:
  masked_row = []
  for col in row:
    masked_row.append(col % 3 == 0)
  print(masked_row)
  masked.append(masked_row)


[True, False, False, True]
[False, False, True, False]
[False, True, False, False]


In [113]:
masked

[[True, False, False, True],
 [False, False, True, False],
 [False, True, False, False]]

In [114]:
tens % 3 == 0

array([[ True, False, False,  True],
       [False, False,  True, False],
       [False,  True, False, False]])

In [115]:
tens[tens % 3 == 0]

array([ 0, 30, 60, 90])

In [116]:
mask = tens % 3 == 0
tens[mask]

array([ 0, 30, 60, 90])

In [120]:
np.where(tens % 3 == 0, #condition
         1, # return the element if True
         8) # return 0 if False

array([[1, 8, 8, 1],
       [8, 8, 1, 8],
       [8, 1, 8, 8]])