### Why Numpy and not conventional Python lists and so on?
Comparing the computation times of an explicit for-loop with vectorized implementation in numpy.

In [2]:
import time
import math
import numpy as np

iter = 10000000

x = np.zeros((iter,1))
v = np.random.randn(iter,1)

before = time.time()

for i in range(iter):
    x[i] = math.exp(v[i])
after = time.time()
print(x)
print("Regular for loop= " + str((after-before)*1000) + "ms")
print('\n')
time1 = (after-before)*1000

before = time.time()
x = np.exp(v)
after = time.time()
print(x)
print("Numpy operation= " + str((after-before)*1000) + "ms")
time2 = (after-before)*1000
print('\n')
print("Numpy is "+ str(round(time1/time2,2)) + " times faster than for loop in Python.")

[[1.0826363 ]
 [0.25659038]
 [2.25713811]
 ...
 [2.87728888]
 [0.99124821]
 [1.0183269 ]]
Regular for loop= 5360.964059829712ms


[[1.0826363 ]
 [0.25659038]
 [2.25713811]
 ...
 [2.87728888]
 [0.99124821]
 [1.0183269 ]]
Numpy operation= 77.69274711608887ms


Numpy is 69.0 times faster than for loop in Python.


### Let's explore breast cancer data

In [3]:
from sklearn.datasets import load_breast_cancer

In [4]:
cancer_data = load_breast_cancer()

In [5]:
cancer_data

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

### How can we access the "data" key of `cancer_data`?

In [5]:
cancer_data['data']

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

### Data `type`?

In [6]:
type(cancer_data['data'])

numpy.ndarray

### Let's find the shape

In [7]:
cancer_data['data'].shape

(569, 30)

In [8]:
import pandas as pd

In [9]:
pd.DataFrame.from_dict(cancer_data, orient='index')

  values = np.array([convert(v) for v in values])


Unnamed: 0,0
data,"[[17.99, 10.38, 122.8, 1001.0, 0.1184, 0.2776,..."
target,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
frame,
target_names,"[malignant, benign]"
DESCR,.. _breast_cancer_dataset:\n\nBreast cancer wi...
feature_names,"[mean radius, mean texture, mean perimeter, me..."
filename,breast_cancer.csv
data_module,sklearn.datasets.data


In [10]:
cancer_data['feature_names']

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [11]:
cancer_data_array = cancer_data['data']
cancer_data_array

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [12]:
cancer_data_array[0]

array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])

In [6]:
cancer_data['feature_names']

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

### The Basics

**Why use NumPy?**

NumPy (Numerical Python) is an open source Python library that’s used in almost every field of science and engineering. It’s the universal standard for working with numerical data in Python, and it’s at the core of the scientific Python and PyData ecosystems.

- NumPy arrays are faster and more compact than Python lists.
- An array consumes less memory and is convenient to use.
- NumPy uses much less memory to store data.
- Code optimization.

In [8]:
a = np.array([1, 2, 3, 4, 5, 6]) # single dim

In [10]:
a.shape

(6,)

In [12]:
a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) # multi dim

In [14]:
type(a)

numpy.ndarray

In [15]:
a[0]

array([1, 2, 3, 4])

In [16]:
a_list = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

In [17]:
a_list[0]

[1, 2, 3, 4]

### `np.zeroes`, `np.ones` and `np.random`

In [15]:
np.zeros((10,1))

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [19]:
np.zeros((10,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [20]:
np.ones((10,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [21]:
np.random.randn(10)

array([-0.31186011, -0.0037786 , -1.33495218,  0.20313432, -0.23260946,
       -0.57894193,  0.56104383,  0.24747842, -0.89909379, -1.44040145])

In [17]:
np.random.randn(10,3)

array([[ 0.29889114, -0.99605393,  0.76720866],
       [-1.4245937 , -0.29882994,  0.14910226],
       [-2.1355849 , -0.23947445, -0.15493525],
       [-2.56144897,  0.1751449 , -0.53122829],
       [ 1.52610503, -0.25185814,  0.08403143],
       [-0.22598887, -0.287076  ,  0.05473635],
       [ 0.27469905, -0.41947125,  0.56374027],
       [ 1.48374634,  0.03408127,  0.88323197],
       [-0.75108722, -1.74802516,  0.59962162],
       [-1.12345901,  0.94205602, -0.2082371 ]])

### `np.arange`

In [18]:
np.arange(4)

array([0, 1, 2, 3])

In [20]:
np.arange(0,9,2)

array([0, 2, 4, 6, 8])

### Adding, removing and sorting

**Sorting**

In [25]:
arr = np.array([2, 1, 5, 3, 7, 4, 6, 8])

In [26]:
np.sort(arr)

array([1, 2, 3, 4, 5, 6, 7, 8])

**Concatenate**

In [27]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])

In [28]:
np.concatenate((a, b))

array([1, 2, 3, 4, 5, 6, 7, 8])

**Shape**

In [29]:
array_example = np.array([[0, 1, 2, 3],[4, 5, 6, 7]])

In [30]:
np.shape(array_example)

(2, 4)

In [31]:
array_example.ndim

2

In [32]:
array_example.size

8

**Reshape**

In [21]:
a = np.arange(6)
a

array([0, 1, 2, 3, 4, 5])

In [24]:
a.reshape(3,2)

array([[0, 1],
       [2, 3],
       [4, 5]])

### Indexing and slicing

![Numpy](https://numpy.org/doc/stable/_images/np_indexing.png)

In [35]:
data = np.array([1, 2, 3])

In [36]:
data[1]

2

In [37]:
data[0:2]

array([1, 2])

**Conditions**

In [26]:
a = np.array([[1 , 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

In [29]:
a[a < 5]

array([1, 2, 3, 4])

In [30]:
five_up = (a >= 5)
a[five_up]

array([ 5,  6,  7,  8,  9, 10, 11, 12])

**Divisible by two**

In [41]:
divisible_by_2 = a[a%2==0]
divisible_by_2

array([ 2,  4,  6,  8, 10, 12])

**Greater than 2 and less than 11?**

In [42]:
c = a[(a > 2) & (a < 11)]
c

array([ 3,  4,  5,  6,  7,  8,  9, 10])

**Broadcasting**

In [31]:
data = np.array([1, 2, 3, 4 ,5])
data * 20

array([ 20,  40,  60,  80, 100])

**`max`, `min`, `sum`**

In [33]:
data

array([1, 2, 3, 4, 5])

In [32]:
data.max()

5

In [45]:
data.min()

1

**Create an np array from 1 to 100 and compute the sum.**

In [34]:
x = np.arange(101)
x

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100])

In [35]:
x.sum()

5050

### Now, can you explain this code?

In [46]:
import time
import math
import numpy as np

iter = 1000000

x = np.zeros((iter,1))
v = np.random.randn(iter,1)

before = time.time()

for i in range(iter):
    x[i] = math.exp(v[i])
after = time.time()
print(x)
print("Regular for loop= " + str((after-before)*1000) + "ms")
print('\n')
time1 = (after-before)*1000

before = time.time()
x = np.exp(v)
after = time.time()
print(x)
print("Numpy operation= " + str((after-before)*1000) + "ms")
time2 = (after-before)*1000
print('\n')
print("Numpy is "+ str(round(time1/time2,2)) + " times faster than for loop in Python.")

[[1.41647042]
 [2.4156572 ]
 [0.44888528]
 ...
 [0.53005892]
 [0.2405328 ]
 [0.8418609 ]]
Regular for loop= 734.9960803985596ms


[[1.41647042]
 [2.4156572 ]
 [0.44888528]
 ...
 [0.53005892]
 [0.2405328 ]
 [0.8418609 ]]
Numpy operation= 10.236978530883789ms


Numpy is 71.8 times faster than for loop in Python.
