In [None]:
%%HTML
<link rel="stylesheet" type="text/css" href="rise.css">

# Python for Data Scientist
## From data manipulation to machine learning

# NumPy
## https://numpy.org/

**NumPy** (**Num**erical **Py**thon)

Libreria per il calcolo numerico in Python

<div class="gallery">
    <img src=images/pandas-logo.png style="max-width: 30%">
    <img src=images/pytorch-logo.png style="max-width: 30%">
    <img src=images/pillow-logo.png style="max-width: 30%">
</div>

In [None]:
import numpy as np

np.version.version

* Struttura dati efficiente (_ndarray_)
* operazioni di calcolo su array
* algebra lineare, numeri random
* Efficienza di C/FORTRAN

# ndarray

* dati omogenei
* array vs liste
* efficienti

<img src=images/ndarray/array.svg class="center" />

<img src=images/ndarray/list.svg class="center" />

In [None]:
# Una lista può contenere dati omogenei
list_a = [1, 2, 3, 4, 5]

# oppure no
list_b = [1, "2", range(3), 5.236, 5]

# Meglio le tuple
tuple_b = (1, "2", range(3), 5.236, 5)

In [None]:
np.array([1, 2, 3, 4, 5])

# np.array([1, 2, "1"])

## Efficienza

In [None]:
my_arr = np.arange(10**6)

my_list = list(range(10**6))

In [None]:
%time for _ in range(10): result = my_arr * 2 

In [None]:
%time for _ in range(10): result = [x*2 for x in my_list]

## shape

**tupla**, indica la misura di ogni dimensione

<div class="gallery">
    <img src=images/ndarray/array.svg width=30% />
    <img src=images/ndarray/matrix.svg width=30% />
    <img src=images/ndarray/pexels-miguel-á-padriñán-19677.jpg width=30%>
</div>
<div class="gallery">
    <div style="width: 30%">(5,)</div>
    <div style="width: 30%">(5, 5)</div>
    <div style="width: 30%">(3, 3, 3)</div>
</div>

In [None]:
np.arange(15)

# np.random.randint(10, size=15)

In [None]:
np.random.randint(0, 10, size=(3, 3))

## dtype

Descrive il tipo di dato dell'array

* **int**(8, 16, 32, 64)
* **uint**(8, 16, 32, 64)
* **float**(16, 32, 64, 128)
* **complex**(64, 128, 265)
* bool, object, string_, unicode_

In [None]:
display(np.ones(5, dtype=int))

# display(np.ones(5, dtype=float))

# display(np.ones(5, dtype=np.float64))

# display(np.ones(5, dtype=str))

## Conversione tra tipi

In [None]:
arr = np.ones((4, 5), dtype=int)

# arr = arr.astype(float)
# arr = arr.astype('int32')

arr

## Creazione array

In [None]:
length = 10

np.array(range(length))

# np.arange(length)

# np.linspace(0, 18, length)

In [None]:
shape = (3, 4)
dtype = np.float16

np.ones(shape, dtype=dtype)
np.zeros(shape, dtype=dtype)

In [None]:
shape = (3, 3)

np.eye(3, dtype=dtype)

# Operazioni tra array

_Le classiche operazioni matematiche avvengono punto per punto_

In [None]:
arr = np.ones((3, 4))
arr + arr

In [None]:
arr == arr
arr < arr

In [None]:
arr = np.array([True, False, True])
arr2 = np.array([True, True, True])

arr & arr2
# arr | arr
# ~arr.astype(bool)

### Prodotto tra matrici

In [None]:
A = np.eye(3)
B = np.ones((3, 3))

np.dot(A, B)
# A.dot(B)
# A @ B

## Accedere agli elementi

In [None]:
arr = np.arange(10)
arr[3]

In [None]:
matrix = np.arange(9).reshape((3, 3))

matrix
# matrix[0][0]
# matrix[0, 0]

In [None]:
rgb = np.random.randint(256, size=(10, 10, 3))

from matplotlib import pyplot as plt
plt.imshow(rgb, interpolation='nearest')
plt.show()

rgb[0, 0]

### Slicing

In [None]:
image = 127 * np.ones((10, 10, 3))

image[0, 3, :]

# image[:3, :3, :]

# image[:, : , 0]

_Per effettuare una copia di uno **slice** bisogna utilizzare `.copy()`_

In [None]:
matrix = np.ones((5, 5))
first_row = matrix[:, 0]
# first_row = matrix[:, 0].copy()

In [None]:
display(matrix)
display(first_row)

In [None]:
first_row[2] = 25

### Slicing indexing

In [None]:
arr = np.arange(20)
arr[3:6]

In [None]:
matrix = np.arange(16).reshape(4, 4)
display(matrix)
matrix[:2, 1:4]

In [None]:
arr = np.arange(20)

for i in range(0, 10, 2):
    print(arr[i], end=" ")

arr[0:10:2]

### Bolean indexing o maschere

In [None]:
matrix = np.arange(16).reshape(4, 4)

indices = [
    [True, False, True, True],
    [True, True, False, True],
    [True, True, True, False],
    [False, True, True, True],
]
print(matrix)
matrix[indices]

In [None]:
image = np.random.randint(256, size=(10, 10, 1))

In [None]:
from matplotlib import pyplot as plt
plt.imshow(image, interpolation='nearest')
plt.colorbar(); plt.show();

In [None]:
display((image > 128)[:5, :5, 0])
image[image > 128] = 0

### Fancy indexing

In [None]:
matrix = np.arange(16).reshape(4, 4)
display(matrix)

matrix[:, [0, 2, 1, 3]]

### Trasposta di un vettore/matrice

In [None]:
matrix = np.arange(27).reshape(3, 3, 3)

display(matrix)

matrix.T

# matrix.transpose(1, 0, 2)

# matrix.swapaxes(1, 0)

## Funzioni

### Unary function

In [None]:
x = np.linspace(-3, 3, 13, dtype=np.float16)

for op in (np.abs, np.sign, np.square, np.ceil, np.floor):
    print(f"np.{op.__name__}:", *op(x), sep="\t")
    print()

# np.square(x)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 6 * np.pi, 10000)
y = np.cos(x)

plt.plot(x, y);

### Binary function

In [None]:
x1 = np.ones(4)
x2 = np.ones_like(x1)
np.add(x1, x2)

In [None]:
x = np.linspace(0, 1, 10)
y = np.array([1, 2, 2, 1, 2, 3, 0, 0, 1, 1])

np.power(x, y)

$y = 3\cdot e^{cos(x)}$

In [None]:
x = np.linspace(-20, 20, 1000)

y = 3 * np.power(np.e, np.cos(x))

In [None]:
plt.plot(x, y);

## Programmazione vettoriale

In [None]:
np.sum(arr)
np.mean(arr)
np.std(arr)
np.min(arr)
np.max(arr)
np.argmin(arr)
np.argmax(arr)
np.cumsum(arr)
np.cumprod(arr)

In [None]:
arr.sum()
arr.min()
arr.argmax()

## metodi booleani

In [None]:
arr.any()

arr.all()

In [None]:
arr = np.random.normal(size=(4, 4))

# arr.sort()
arr.sort(0)

arr

In [None]:
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 2, 3])

np.unique(x)
np.intersect1d(x, y)
np.union1d(x, y)
np.in1d(x, y)
np.setdiff1d(x, y)
np.setxor1d(x, y)

## Algebra Lineare (numpy.linalg)

In [None]:
m = np.random.random((3, 3))

np.diag(m)
np.linalg.det(m)
np.linalg.inv(m)

q, r = np.linalg.qr(m)
x = np.linalg.solve(m, [1, 2, 3])

## Numeri pseudocasuali

In [None]:
points = np.random.random((2, 1000))
points = np.random.normal(size=(2, 1000))

plt.scatter(points[0, :], points[1, :])
# plt.hist(points[0], bins=20)

In [None]:
import pandas as pd
from pandas import DataFrame as df

In [None]:
df(points.T, columns=("x", "y"))