# Machine Learning Zoomcamp


## 1.7 Introduction to NumPy


Plan:

* Creating arrays
* Multi-dimensional arrays
* Randomly generated arrays
* Element-wise operations
    * Comparison operations
    * Logical operations
* Summarizing operations

In [3]:
import numpy as np

In [4]:
np

<module 'numpy' from '/home/codespace/.local/lib/python3.12/site-packages/numpy/__init__.py'>

## Creating arrays


In [5]:
# Create an array of size 10 filled with zeros
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
# Create an array of size 10 filled with ones
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [7]:
# Create an array of size 10 filled with 2.5
np.full(10, 2.5)

array([2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])

In [8]:
# Create an array from a Python list
a = np.array([1, 2, 3, 5, 7, 12])
a

array([ 1,  2,  3,  5,  7, 12])

In [9]:
a.dtype

dtype('int64')

In [10]:
# Access an element 3 from an array
# Indexes in Python start with zero. So, to access the third element, we use index 2
# We will get 3, but wrapped in NumPyâ€™s type system, in this case, an object of type numpy.int64
a[2] 

np.int64(3)

In [11]:
# To get the standard Python integer, we can convert it using int()
int(a[2])

3

In [12]:
# Modify the third element of the array to be 10
a[2] = 10

In [13]:
# Display the modified array: now we have 10 instead of 3 in the third position
a

array([ 1,  2, 10,  5,  7, 12])

In [14]:
# Create an array with a range of values from 3 to 9
np.arange(3, 10)

array([3, 4, 5, 6, 7, 8, 9])

In [15]:
# Create an array with a range of values from 0 to 100 with size of 11
np.linspace(0, 100, 11)

array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

## Multi-dimensional arrays


In [16]:
# Create a 2D array (matrix) of size 5x2 filled with zeros
np.zeros((5, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [17]:
n = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [18]:
int(n[0, 1])

2

In [None]:
# Modify the element in the first row and second column to be 20
n[0, 1] = 20
n

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [25]:
# Access the entire second row of the array
n[1]

array([4, 5, 6])

In [26]:
# Modify the entire third row to be [1, 1, 1]
n[2] = [1, 1, 1]

In [27]:
n

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 1,  1,  1]])

In [28]:
# Access the entire first column of the array
n[:,0]

array([1, 4, 1])

In [29]:
# Modify the entire third column to be [0, 1, 2]
n[:, 2] = [0, 1, 2]

In [30]:
n

array([[ 1, 20,  0],
       [ 4,  5,  1],
       [ 1,  1,  2]])

## Randomly generated arrays


In [31]:
np.random.seed(2) # Set the random seed for reproducibility
100 * np.random.rand(5, 2) # Generate a 5x2 array with random values between 0 and 100

array([[43.59949021,  2.59262318],
       [54.96624779, 43.53223926],
       [42.03678021, 33.0334821 ],
       [20.4648634 , 61.92709664],
       [29.96546737, 26.68272751]])

In [33]:
# Generate a 5x2 array with random values from a standard NORMAL distribution
# Instead of rand we use randn
np.random.seed(2)
np.random.randn(5, 2) 

array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737],
       [ 0.50288142, -1.24528809],
       [-1.05795222, -0.90900761]])

In [34]:
# Generate a 5x2 array with random integers between 0 and 100
np.random.seed(2)
np.random.randint(low=0, high=100, size=(5, 2))

array([[40, 15],
       [72, 22],
       [43, 82],
       [75,  7],
       [34, 49]])

## Element-wise operations


In [35]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [37]:
# Add 1 to each element in the array
# The same idea applies to other arithmetic operations: subtraction, multiplication, division, etc.
a + 1

array([1, 2, 3, 4, 5])

In [38]:
# Multiply each element by 2, then add 10, raise to the power of 2, and finally divide by 100
b = (10 + (a * 2)) ** 2 / 100

In [39]:
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [None]:
# Add two arrays element-wise
a + b

array([1.  , 2.44, 3.96, 5.56, 7.24])

In [42]:
a / b + 10

array([10.        , 10.69444444, 11.02040816, 11.171875  , 11.2345679 ])

## Comparison operations

In [43]:
a

array([0, 1, 2, 3, 4])

In [44]:
a >= 2

array([False, False,  True,  True,  True])

In [45]:
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [47]:
# Compare two arrays element-wise
a > b

array([False, False,  True,  True,  True])

In [48]:
# Get elements from 'a' where the corresponding element in 'a' is greater than in 'b'
a[a > b]

array([2, 3, 4])

## Summarizing operations

In [49]:
a

array([0, 1, 2, 3, 4])

In [51]:
a.min()

np.int64(0)

In [None]:
# get the standard deviation of the array
a.std()

np.float64(1.4142135623730951)

In [None]:
n.min()

np.int64(0)

### Next

Linear algebra refresher