# Machine Learning Zoomcamp


## 1.7 Introduction to NumPy


Plan:

* Creating arrays
* Multi-dementional arrays
* Randomly generated arrays
* Element-wise operations
    * Comparison operations
    * Logical operations
* Summarizing operations

In [1]:
import numpy as np

In [2]:
np

<module 'numpy' from '/Users/chuksokoli/anaconda3/envs/ml-zoomcamp/lib/python3.9/site-packages/numpy/__init__.py'>

## Creating arrays


In [3]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [4]:
np.ones(10)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [5]:
np.full(10, 2.5)

array([2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5])

In [6]:
a = np.array([1, 2, 3, 5, 7, 12])
a

array([ 1,  2,  3,  5,  7, 12])

In [7]:
a[2] = 10

In [8]:
a

array([ 1,  2, 10,  5,  7, 12])

In [9]:
# np.arange() similar to range in pandas
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
np.arange(3, 10)

array([3, 4, 5, 6, 7, 8, 9])

In [11]:
np.linspace(0, 100, 11)

array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

## Multi-dementional arrays


In [12]:
np.zeros((5, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [13]:
n = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [14]:
n[0, 1] = 20

In [15]:
n

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [16]:
n[2] = [1, 1, 1]

In [17]:
n

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 1,  1,  1]])

In [18]:
n[:, 2] = [0, 1, 2]

In [19]:
n

array([[ 1, 20,  0],
       [ 4,  5,  1],
       [ 1,  1,  2]])

In [20]:
n[:, 1]

array([20,  5,  1])

## Randomly generated arrays


In [21]:
np.random.seed(2)
100 * np.random.rand(5, 2)

array([[43.59949021,  2.59262318],
       [54.96624779, 43.53223926],
       [42.03678021, 33.0334821 ],
       [20.4648634 , 61.92709664],
       [29.96546737, 26.68272751]])

In [22]:
np.random.seed(2)
np.random.randn(5, 2) # standard normal distribution

array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737],
       [ 0.50288142, -1.24528809],
       [-1.05795222, -0.90900761]])

In [23]:
np.random.seed(2)
np.random.randint(low=0, high=100, size=(5, 2)) # get random numbers between 0 and 100

array([[40, 15],
       [72, 22],
       [43, 82],
       [75,  7],
       [34, 49]])

In [24]:
np.random.rand(100)

array([0.20464863, 0.61927097, 0.29965467, 0.26682728, 0.62113383,
       0.52914209, 0.13457995, 0.51357812, 0.18443987, 0.78533515,
       0.85397529, 0.49423684, 0.84656149, 0.07964548, 0.50524609,
       0.0652865 , 0.42812233, 0.09653092, 0.12715997, 0.59674531,
       0.226012  , 0.10694568, 0.22030621, 0.34982629, 0.46778748,
       0.20174323, 0.64040673, 0.48306984, 0.50523672, 0.38689265,
       0.79363745, 0.58000418, 0.1622986 , 0.70075235, 0.96455108,
       0.50000836, 0.88952006, 0.34161365, 0.56714413, 0.42754596,
       0.43674726, 0.77655918, 0.53560417, 0.95374223, 0.54420816,
       0.08209492, 0.3663424 , 0.8508505 , 0.40627504, 0.02720237,
       0.24717724, 0.06714437, 0.99385201, 0.97058031, 0.80025835,
       0.60181712, 0.76495986, 0.16922545, 0.29302323, 0.52406688,
       0.35662428, 0.04567897, 0.98315345, 0.44135492, 0.50400044,
       0.32354132, 0.25974475, 0.38688989, 0.8320169 , 0.73674706,
       0.37921057, 0.01301734, 0.79740494, 0.2693888 , 0.58268

## Element-wise operations


In [25]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [26]:
b = (10 + (a * 2)) ** 2 / 100

In [27]:
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [28]:
a / b + 10

array([10.        , 10.69444444, 11.02040816, 11.171875  , 11.2345679 ])

## Comparison operations

In [29]:
a

array([0, 1, 2, 3, 4])

In [30]:
a >= 2

array([False, False,  True,  True,  True])

In [31]:
b

array([1.  , 1.44, 1.96, 2.56, 3.24])

In [32]:
a > b

array([False, False,  True,  True,  True])

In [33]:
a[a > b]

array([2, 3, 4])

## Summarizing operations

In [34]:
a

array([0, 1, 2, 3, 4])

In [35]:
a.std()

1.4142135623730951

In [36]:
n.min()

0

### Next

Linear algebra refresher