In [1]:
import numpy as np

### Array Fundamentals

In [2]:
# create a new list
my_list = [1, 2, 3, 4, 5]
my_list

[1, 2, 3, 4, 5]

In [3]:
type(my_list)

list

In [4]:
# create an ndarray using list 
my_array = np.array(my_list)
my_array

array([1, 2, 3, 4, 5])

In [5]:
type(my_array)

numpy.ndarray

In [6]:
# access array elements
my_array[1]

2

In [7]:
# ndarray is mutable, same as list
my_array[2] = 100
my_array

array([  1,   2, 100,   4,   5])

In [8]:
# like the original list, Python slice notation can be used for indexing
my_array[:3]

array([  1,   2, 100])

In [9]:
# One major difference is that slice indexing of a list "copies" the elements into a new list, 
# but slicing an array returns a "view": an object that refers to the data in the original array. 
# The original array can be mutated using the view.

array2 = my_array[3:]
array2

array([4, 5])

In [10]:
array2[0] = 200
array2

array([200,   5])

In [11]:
# original array also gets modified

my_array

array([  1,   2, 100, 200,   5])

In [12]:
# 2D list
my_list_2d = [[1,2,3,4], [5,6,7,8], [9,10,11,12]]
my_list_2d

[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

In [13]:
# 2D ndarray
my_array_2d = np.array(my_list_2d)
my_array_2d

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [14]:
# list of lists elements can be accessed by specifying indices using 2 square brackets.

# my_list_2d[1,3]
my_list_2d[1][3]

8

In [15]:
# Difference between an array and a list of lists is that an element of the array can be accessed 
# by specifying the index along each axis within a single set of square brackets, separated by commas. 
# For instance, the element 8 is in row 1 and column 3:

my_array_2d[1, 3]

8

In [16]:
# We might hear of a 0-D (zero-dimensional) array referred to as a “scalar”, 
# a 1-D (one-dimensional) array as a “vector”, 
# a 2-D (two-dimensional) array as a “matrix”, 
# or an N-D (N-dimensional, where “N” is typically an integer greater than 2) array as a “tensor”. 

# For clarity, it is best to avoid the mathematical terms when referring to an array 
# because the mathematical objects with these names behave differently than arrays 
# (e.g. “matrix” multiplication is fundamentally different from “array” multiplication), 
# and there are other objects in the scientific Python ecosystem that have these names 
# (e.g. the fundamental data structure of PyTorch is the “tensor”).

### Array attributes

In [17]:
# lets discuss about array attributes: ndim, shape, size, dtype

In [18]:
# ndim
# The number of dimensions of an array is contained in the ndim attribute.

my_array_2d.ndim

2

In [19]:
my_array.ndim

1

In [20]:
# shape
# The shape of an array is a tuple of non-negative integers that specify the number of elements along each dimension.

my_array_2d.shape

(3, 4)

In [21]:
my_array.shape

(5,)

In [22]:
len(my_array_2d.shape) == my_array_2d.ndim

True

In [23]:
len(my_array.shape) == my_array.ndim

True

In [24]:
# size
# The fixed, total number of elements in array is contained in the size attribute.

my_array_2d.size

12

In [25]:
my_array.size

5

In [26]:
import math

In [27]:
my_array_2d.size == math.prod(my_array_2d.shape)

True

In [28]:
my_array.size == math.prod(my_array.shape)

True

In [29]:
# dtype
# Arrays are typically “homogeneous”, meaning that they contain elements of only one “data type”. 
# The data type is recorded in the dtype attribute.

my_array_2d.dtype

dtype('int32')

In [30]:
my_array.dtype

dtype('int32')

### Create a basic array

In [31]:
# lets discuss about np.zeros(), np.ones(), np.empty(), np.arange(), np.linspace()

In [32]:
# np.zeros()
# Besides creating an array from a sequence of elements, you can easily create an array filled with 0’s:

np.zeros(3)

array([0., 0., 0.])

In [33]:
# np.ones()
# Besides creating an array from a sequence of elements, you can easily create an array filled with 1’s:

np.ones(3)

array([1., 1., 1.])

In [34]:
# np.empty()
# The function empty creates an array whose initial content is random and depends on the state of the memory. 
# The reason to use empty over zeros (or something similar) is speed - just make sure to fill every element afterwards!

np.empty(3)

array([1., 1., 1.])

In [35]:
# np.arange()
# We can create an array with a range of elements:

np.arange(5)

array([0, 1, 2, 3, 4])

In [36]:
# And even an array that contains a range of evenly spaced intervals. 
# To do this, we will specify the first number, last number, and the step size.

np.arange(2, 11, 2)

array([ 2,  4,  6,  8, 10])

In [37]:
# np.linspace()
# We can use np.linspace() to create an array with values that are spaced linearly in a specified interval:

np.linspace(0, 10, num=5)

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [38]:
np.linspace(0, 10, num=8)

array([ 0.        ,  1.42857143,  2.85714286,  4.28571429,  5.71428571,
        7.14285714,  8.57142857, 10.        ])

In [39]:
# While the default data type is floating point (np.float64), 
# we can explicitly specify which data type we want using the dtype keyword.

x = np.ones(2, dtype=np.int64)
x

array([1, 1], dtype=int64)

### Adding, removing, and sorting elements

In [40]:
# lets discuss np.sort(), np.concatenate()

In [41]:
# np.sort()
# Sorting an element is simple with np.sort(). 
# We can specify the axis, kind, and order when we call the function.

arr = np.array([2, 1, 5, 3, 7, 4, 6, 8])
arr

array([2, 1, 5, 3, 7, 4, 6, 8])

In [42]:
np.sort(arr)

array([1, 2, 3, 4, 5, 6, 7, 8])

In [43]:
# np.concatenate()
# Used for concatenating 2 arrays

a = np.array([1, 2, 3, 4])
a

array([1, 2, 3, 4])

In [44]:
b = np.array([5, 6, 7, 8])
b

array([5, 6, 7, 8])

In [45]:
np.concatenate((a,b))

array([1, 2, 3, 4, 5, 6, 7, 8])

In [46]:
x = np.array([[1, 2], [3, 4]])
x

array([[1, 2],
       [3, 4]])

In [47]:
y = np.array([[5, 6]])
y

array([[5, 6]])

In [48]:
np.concatenate((x, y), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [49]:
np.concatenate((x, y))

array([[1, 2],
       [3, 4],
       [5, 6]])

### Reshape an array

In [50]:
# reshape()
# Using arr.reshape() will give a new shape to an array without changing the data. 
# Just remember that when we use the reshape method, the array we want to produce needs to have 
# the same number of elements as the original array. 
# If you start with an array with 12 elements, we’ll need to make sure that 
# our new array also has a total of 12 elements.

In [51]:
a = np.arange(6)
a

array([0, 1, 2, 3, 4, 5])

In [52]:
# we can reshape this array to an array with three rows and two columns:
b = a.reshape(3, 2)
b

array([[0, 1],
       [2, 3],
       [4, 5]])

### Convert 1D array to 2D array

In [53]:
# lets discuss about np.newaxis, np.expand_dims

In [54]:
# np.newaxis
# This will increase the dimensions of our array by one dimension when used once.
# This means that a 1D array will become a 2D array, a 2D array will become a 3D array, and so on.

In [55]:
a = np.arange(1,7)
a

array([1, 2, 3, 4, 5, 6])

In [56]:
a.shape

(6,)

In [57]:
# We can convert a 1D array to a row vector by inserting an axis along the first dimension:

row_vector = a[np.newaxis, :]
row_vector

array([[1, 2, 3, 4, 5, 6]])

In [58]:
row_vector.shape

(1, 6)

In [59]:
# for a column vector, we can insert an axis along the second dimension:

col_vector = a[:, np.newaxis]
col_vector

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

In [60]:
col_vector.shape

(6, 1)

In [61]:
# np.expand_dims
# We can also expand an array by inserting a new axis at a specified position

In [62]:
a = np.arange(1,7)
a

array([1, 2, 3, 4, 5, 6])

In [63]:
a.shape

(6,)

In [64]:
# We can use np.expand_dims to add an axis at index position 0 with:

b = np.expand_dims(a, axis=0)
b

array([[1, 2, 3, 4, 5, 6]])

In [65]:
b.shape

(1, 6)

In [66]:
# similarly for index position 1:

c = np.expand_dims(a, axis=1)
c

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

In [67]:
c.shape

(6, 1)

### Indexing and Slicing

In [68]:
# We can index and slice NumPy arrays in the same ways we can slice Python lists.

In [69]:
data = np.array([1, 2, 3])
data

array([1, 2, 3])

In [70]:
data[1]

2

In [71]:
data[0:2]

array([1, 2])

In [72]:
data[1:]

array([2, 3])

In [73]:
data[-2:]

array([2, 3])

In [74]:
# If we want to select values from our array that fulfill certain conditions, it’s straightforward with NumPy.

In [75]:
a = np.array([[1 , 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [76]:
a[a<5]

array([1, 2, 3, 4])

In [77]:
a[a>5]

array([ 6,  7,  8,  9, 10, 11, 12])

In [78]:
# divisible by 2
a[a%2 == 0]

array([ 2,  4,  6,  8, 10, 12])

In [79]:
# using conditions

a[(a > 2) & (a < 11)]

array([ 3,  4,  5,  6,  7,  8,  9, 10])

In [80]:
(a > 5) | (a == 5)

array([[False, False, False, False],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [81]:
# np.nonzero()
# we can use np.nonzero() to select elements or indices from an array.

In [82]:
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [83]:
# We can use np.nonzero() to print the indices of elements that are, for example, less than or equal to 6:

b = np.nonzero(a <= 6)
b

(array([0, 0, 0, 0, 1, 1], dtype=int64),
 array([0, 1, 2, 3, 0, 1], dtype=int64))

In [84]:
type(b)

tuple

In [85]:
# In this example, a tuple of arrays was returned: one for each dimension. 
# The first array represents the row indices where these values are found, and 
# the second array represents the column indices where the values are found.

In [86]:
# If we want to generate a list of coordinates where the elements exist, 
# we can zip the arrays, iterate over the list of coordinates, and print them. 
# For example:

list(zip(b[0], b[1]))

[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1)]

In [87]:
# We can also use np.nonzero() to print the elements in an array that are less than or equal to 6:

a[b]

array([1, 2, 3, 4, 5, 6])

In [88]:
# If the element we’re looking for doesn’t exist in the array, then the returned array of indices will be empty.

np.nonzero(a == 42)

(array([], dtype=int64), array([], dtype=int64))

### Create an array from existing data

In [90]:
# lets discuss: slicing and indexing, np.vstack(), np.hstack(), np.hsplit(), view(), copy()

In [91]:
a = np.array([1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
a

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [93]:
# We can create a new array from a section of our array any time by 
# specifying where we want to slice our array.

arr1 = a[3:8]
arr1

array([4, 5, 6, 7, 8])

In [94]:
# We can also stack two existing arrays, both vertically and horizontally.

a1 = np.array([[1, 1],
               [2, 2]])
a1

array([[1, 1],
       [2, 2]])

In [95]:
a2 = np.array([[3, 3],
               [4, 4]])
a2

array([[3, 3],
       [4, 4]])

In [96]:
# We can stack them vertically with vstack:

np.vstack((a1, a2))

array([[1, 1],
       [2, 2],
       [3, 3],
       [4, 4]])

In [97]:
# Or stack them horizontally with hstack:

np.hstack((a1, a2))

array([[1, 1, 3, 3],
       [2, 2, 4, 4]])

In [98]:
# We can split an array into several smaller arrays using hsplit.

np.arange(1, 25)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24])

In [99]:
x = np.arange(1, 25).reshape(2, 12)
x

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
       [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]])

In [100]:
# If we wanted to split this array into three equally shaped arrays, we would run:

np.hsplit(x, 3)

[array([[ 1,  2,  3,  4],
        [13, 14, 15, 16]]),
 array([[ 5,  6,  7,  8],
        [17, 18, 19, 20]]),
 array([[ 9, 10, 11, 12],
        [21, 22, 23, 24]])]

In [104]:
# If we wanted to split our array after the third and fourth column, we’d run:

np.hsplit(x, (3, 4))

[array([[ 1,  2,  3],
        [13, 14, 15]]),
 array([[ 4],
        [16]]),
 array([[ 5,  6,  7,  8,  9, 10, 11, 12],
        [17, 18, 19, 20, 21, 22, 23, 24]])]

In [105]:
# We can use the view method to create a new array object that 
# looks at the same data as the original array (a shallow copy).

# Views are an important NumPy concept! 
# NumPy functions, as well as operations like indexing and slicing, will return views whenever possible. 
# This saves memory and is faster (no copy of the data has to be made). 

# However it’s important to be aware of this - modifying data in a view also modifies the original array!

In [106]:
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [107]:
# Now we create an array b1 by slicing a and modify the first element of b1. 
# This will modify the corresponding element in a as well!

b1 = a[0, :]
b1

array([1, 2, 3, 4])

In [108]:
b1[0] = 99
b1

array([99,  2,  3,  4])

In [109]:
a

array([[99,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [110]:
# Using the copy method will make a complete copy of the array and its data (a deep copy). 
# To use this on our array, we could run:

b2 = a.copy()
b2

array([[99,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [111]:
b2[0] = 1
b2

array([[ 1,  1,  1,  1],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [112]:
a

array([[99,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

### Basic array operations

In [113]:
# lets discuss addition, subtraction, multiplication, division, and more

In [114]:
data = np.array([1,2])
data

array([1, 2])

In [117]:
np.ones(2)

array([1., 1.])

In [115]:
ones = np.ones(2, dtype=int)
ones

array([1, 1])

In [118]:
# add
data + ones

array([2, 3])

In [119]:
# subtract
data - ones

array([0, 1])

In [120]:
# multiply

data * ones

array([1, 2])

In [121]:
data * data

array([1, 4])

In [122]:
# division

data / ones

array([1., 2.])

In [123]:
data / data

array([1., 1.])

In [124]:
# If we want to find the sum of the elements in an array, we’d use sum(). 

a = np.array([1,2,3,4])
a

array([1, 2, 3, 4])

In [125]:
a.sum()

10

In [126]:
# To add the rows or the columns in a 2D array, we would specify the axis.

b = np.array([[1, 1], [2, 2]])
b

array([[1, 1],
       [2, 2]])

In [127]:
# We can sum over the axis of rows with:

b.sum(axis=0)

array([3, 3])

In [128]:
# We can sum over the axis of columns with:

b.sum(axis=1)

array([2, 4])

### Broadcasting

In [129]:
# There are times when we might want to carry out an operation between an array and a single number 
# (also called an operation between a vector and a scalar) or between arrays of two different sizes. 

# For example, our array (we’ll call it “data”) might contain information about distance in miles 
# but we want to convert the information to kilometers. 

# We can perform this operation with:

data = np.array([1.0, 2.0])
data

array([1., 2.])

In [130]:
data * 1.6

array([1.6, 3.2])

In [131]:
# NumPy understands that the multiplication should happen with each cell. 
# That concept is called broadcasting. 

# Broadcasting is a mechanism that allows NumPy to perform operations on arrays of different shapes.
# The dimensions of our array must be compatible, for example, when the dimensions of both arrays are equal or 
# when one of them is 1. 

# If the dimensions are not compatible, we will get a ValueError.

### Other useful array operations