# NumPy introduction

This notebook is a very brief introduction to the very basic of NumPy

Importing numpy and naming it (make sure you have installed it in conda)

In [1]:
import numpy as np

Creating an ndarray

In [2]:
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

Or an array of the first 10 numbers (including 0) 

In [3]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
np.arange(9).reshape((3, 3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [5]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
[3, 5, 7]   # A normal/classic array in Python

[3, 5, 7]

In [7]:
np.array([3, 5, 7])   # Can be turned into a ndarray

array([3, 5, 7])

For the rest of the notebook, whenever we talk about arrays we will be talking about ndarrays!

## Operations on arrays

In [8]:
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [9]:
data * 10

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [10]:
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [11]:
data * data   # note the element wise multiplication, not matrix multiplication

array([[2.250e+00, 1.000e-02, 9.000e+00],
       [0.000e+00, 9.000e+00, 4.225e+01]])

In [12]:
data ** 2    # Element wise exponentation

array([[2.250e+00, 1.000e-02, 9.000e+00],
       [0.000e+00, 9.000e+00, 4.225e+01]])

## Shape and types of arrays

In [13]:
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [14]:
data.shape

(2, 3)

In [15]:
np.arange(9).reshape((3, 3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [16]:
np.arange(9).reshape((3, 3)).shape

(3, 3)

In [17]:
np.arange(18).reshape((2, 3, 3))

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]]])

In [18]:
np.arange(18).reshape((2, 3, 3)).shape

(2, 3, 3)

In [19]:
data.ndim   # Number of dimensions

2

In [20]:
np.arange(9).reshape((3, 3)).ndim

2

In [21]:
np.arange(18).reshape((2, 3, 3)).ndim

3

In [22]:
data.dtype   # Type of the data in the array

dtype('float64')

In [23]:
np.arange(9).reshape((3, 3)).dtype

dtype('int64')

## Basic Indexing and Slicing

In [24]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [25]:
arr[0]

np.int64(0)

In [26]:
arr[5]

np.int64(5)

In [27]:
arr[5:8]

array([5, 6, 7])

In [28]:
arr[:4]

array([0, 1, 2, 3])

In [29]:
arr[4:]

array([4, 5, 6, 7, 8, 9])

In [30]:
arr[-1]   # The last element

np.int64(9)

In [31]:
arr[-3]

np.int64(7)

In [32]:
arr[4:-1]

array([4, 5, 6, 7, 8])

In [33]:
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

NOTE: An important first distinction from Python's built-in lists is that array slices are views on the original array. This means that the data is not copied, and any modifications to the view will be reflected in the source array. If you want a copy of a slice of an ndarray instead of a view, you will need to explicitly copy the arrayâ€”for example, `arr[5:8].copy()`. As you will see, pandas works this way, too.

In [34]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [35]:
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [36]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

Slicing in multi dimensional arrays:

In [37]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [38]:
arr2d[0][2]

np.int64(3)

In [39]:
arr2d[0, 2]

np.int64(3)

## Pseudo random numbers

In [40]:
np.random.standard_normal(6)

array([ 0.46025741, -0.84002522, -0.25258644,  0.2523535 ,  0.95309008,
        0.98347051])

In [41]:
samples = np.random.standard_normal(size=(4, 4))
samples

array([[ 0.13649548, -0.56193523,  0.22753476, -0.48452861],
       [-0.98568233,  0.98228694, -0.69784819,  0.2509601 ],
       [ 0.91248887, -0.74737261, -1.30011478, -0.3464783 ],
       [-1.24538753,  0.38358363,  1.41078112, -0.13906282]])

In [42]:
samples = np.random.standard_normal(size=(4, 4))
samples

array([[ 0.76112381,  1.54618684, -0.63433583, -0.2624301 ],
       [-1.68617678, -0.79218615, -0.07575092,  0.21452631],
       [-1.23325   , -0.85365608,  0.45589441,  0.909926  ],
       [ 0.06731866, -1.80258948, -0.06871729,  1.18302358]])

In [43]:
np.random.seed(7543)
samples = np.random.standard_normal(size=(4, 4))
samples

array([[ 1.27140175, -0.04332327,  1.10938361,  0.6228281 ],
       [ 1.13558614,  2.84856201, -0.75810951, -1.42308195],
       [ 0.53978651, -0.57886598, -0.14440006,  1.05849781],
       [ 0.62437829,  1.39862453,  0.41538407, -1.73959407]])

In [44]:
np.random.seed(7543)
samples = np.random.standard_normal(size=(4, 4))
samples

array([[ 1.27140175, -0.04332327,  1.10938361,  0.6228281 ],
       [ 1.13558614,  2.84856201, -0.75810951, -1.42308195],
       [ 0.53978651, -0.57886598, -0.14440006,  1.05849781],
       [ 0.62437829,  1.39862453,  0.41538407, -1.73959407]])

In [45]:
?np.random.standard_normal

[31mSignature:[39m np.random.standard_normal(size=[38;5;28;01mNone[39;00m)
[31mDocstring:[39m
standard_normal(size=None)

Draw samples from a standard Normal distribution (mean=0, stdev=1).

.. note::
    New code should use the
    `~numpy.random.Generator.standard_normal`
    method of a `~numpy.random.Generator` instance instead;
    please see the :ref:`random-quick-start`.

Parameters
----------
size : int or tuple of ints, optional
    Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
    ``m * n * k`` samples are drawn.  Default is None, in which case a
    single value is returned.

Returns
-------
out : float or ndarray
    A floating-point array of shape ``size`` of drawn samples, or a
    single sample if ``size`` was not specified.

See Also
--------
normal :
    Equivalent function with additional ``loc`` and ``scale`` arguments
    for setting the mean and standard deviation.
random.Generator.standard_normal: which should be used for new code.

Notes
-

In [46]:
?np.random.normal

[31mSignature:[39m np.random.normal(loc=[32m0.0[39m, scale=[32m1.0[39m, size=[38;5;28;01mNone[39;00m)
[31mDocstring:[39m
normal(loc=0.0, scale=1.0, size=None)

Draw random samples from a normal (Gaussian) distribution.

The probability density function of the normal distribution, first
derived by De Moivre and 200 years later by both Gauss and Laplace
independently [2]_, is often called the bell curve because of
its characteristic shape (see the example below).

The normal distributions occurs often in nature.  For example, it
describes the commonly occurring distribution of samples influenced
by a large number of tiny, random disturbances, each with its own
unique distribution [2]_.

.. note::
    New code should use the `~numpy.random.Generator.normal`
    method of a `~numpy.random.Generator` instance instead;
    please see the :ref:`random-quick-start`.

Parameters
----------
loc : float or array_like of floats
    Mean ("centre") of the distribution.
scale : float or ar

In [47]:
np.random.normal(loc=0, scale=1, size=(4, 4))   # loc is mean and scale is standard deviation

array([[-1.81884292, -0.55703011,  0.31198285, -0.38893175],
       [-0.37512346, -0.97726323, -1.32877957,  1.40425757],
       [ 0.43319899,  1.39928187,  0.38719385, -0.49198852],
       [ 0.02135958, -1.30883377,  1.43643771,  0.16148611]])

In [48]:
np.random.normal(loc=10, scale=3, size=(4, 4))

array([[ 8.89042101,  9.38605894,  7.52072832,  9.6227371 ],
       [14.26204971,  9.89353353, 10.92666206, 12.38517339],
       [ 7.73782441, 15.48614439, 12.68835077, 13.15828441],
       [15.65332629, 10.90604289,  3.60942003,  7.43037449]])

In [49]:
np.random.uniform(size=20)

array([0.13817286, 0.6504811 , 0.04545496, 0.63376064, 0.37648109,
       0.04688349, 0.22374641, 0.03068383, 0.99147247, 0.47615766,
       0.68514384, 0.24624942, 0.57083332, 0.96671957, 0.89088139,
       0.61213768, 0.65863131, 0.69876043, 0.06900028, 0.24650304])

See list of more distribution in the book

## Element wise application of functions

In [50]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [51]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [52]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [53]:
x = np.random.standard_normal(8)
x

array([ 0.02404029,  0.08646241, -0.84280157, -1.29068063,  0.49297634,
       -0.73377302, -1.3750421 , -0.35488264])

In [54]:
y = np.random.standard_normal(8)
y

array([-1.92994006, -0.52149893,  0.47312506, -0.18097657, -1.9904029 ,
       -1.09839481, -0.79309888,  1.18626625])

In [55]:
np.maximum(x, y)

array([ 0.02404029,  0.08646241,  0.47312506, -0.18097657,  0.49297634,
       -0.73377302, -0.79309888,  1.18626625])