In [None]:
%%HTML
<link rel="stylesheet" type="text/css" href="rise.css">

# Python for Data Scientist
## From data manipulation to machine learning


<div style="display: flex; justify-content: space-between; 
            align-items: center;">
    <img src="images/me.jpg">
    <div style="text-align: center; vertical-align: middle; width: 100%">
        <br>
        <h1>Elia Giacobazzi</h1>
        elia.giacobazzi@euler.it<br>
        <h5>
            Matematico<br>
            Libero Professionista<br>
            Data Scientist
        </h5>
    </div>
</div>

<div class="gallery">
    <img src=images/clinux-logo.png>
    <img src=images/euler-logo.png>
</div>

## Premessa

Ringrazio **Alessandro Grandi** di **Digital Dreamers** per le loro slides

# Data Science

_Scopo del Data Scientist è quello di organizzare e analizzare grandi quantità di dati_

* Big Data
* Analytics
* Automazione
* Machine Learning

### E python cosa c'entra?

* [numfocus.org](https://numfocus.org)
* [numpy.org](https://numpy.org)
* [pandas.pydata.org](https://pandas.pydata.org)
* [scipy.org](https://scipy.org)
* [scikit-learn.org](https://scikit-learn.org)

#### Tutto molto bello, ma come si ottiene questa roba?

Tramite `pip`
```sh
> pip install numpy
> pip install pandas 
> pip install scipy
...
```

Tramite [Anaconda](https://www.anaconda.com/products/distribution)
```bash
> conda create --name science 
> conda activate science
> conda install numpy
> conda install pandas
...

```

# NumPy
## https://numpy.org/

**NumPy** (**Num**erical **Py**thon)

Libreria per il calcolo numerico in Python

<div class="gallery">
    <img src=images/pandas-logo.png>
    <img src=images/pytorch-logo.png>
    <img src=images/pillow-logo.png>
</div>

In [None]:
import numpy as np

np.version.version

* Struttura dati efficiente (_ndarray_)
* operazioni di calcolo su array
* algebra lineare, numeri random
* Efficienza di C/FORTRAN

## ndarray

### ndarray
* dati omogenei
* array vs liste
* efficienti



<img src=images/ndarray/array.svg class="center" />

<img src=images/ndarray/list.svg class="center" />

In [None]:
# Una lista può contenere dati omogenei
list_a = [1, 2, 3, 4, 5]

# oppure no
list_b = [1, "2", range(3), 5.236, 5]

# Meglio le tuple
tuple_b = (1, "2", range(3), 5.236, 5)

In [None]:
# np.array([1, 2, 3, 4, 5])

# np.array([1, 2, "1"])

## Efficienza

In [None]:
my_arr = np.arange(10**6)

my_list = list(range(10**6))

In [None]:
%time for _ in range(10): result = my_arr * 2 

In [None]:
%time for _ in range(10): result = [x*2 for x in my_list]

### shape

**tupla**, indica la misura di ogni dimensione

<div class="gallery">
    <img src=images/ndarray/array.svg width=30% />
    <img src=images/ndarray/matrix.svg width=30% />
    <img src=images/ndarray/pexels-miguel-á-padriñán-19677.jpg width=30%>
</div>
<div class="gallery">
    <div style="width: 30%">(5,)</div>
    <div style="width: 30%">(5, 5)</div>
    <div style="width: 30%">(3, 3, 3)</div>
</div>

In [None]:
np.arange(15)

# np.random.randint(10, size=15)

In [None]:
np.random.randint(0, 10, size=(3, 3))

## dtype

Descrive il tipo di dato dell'array

* **int**(8, 16, 32, 64)
* **uint**(8, 16, 32, 64)
* **float**(16, 32, 64, 128)
* **complex**(64, 128, 265)
* bool, object, string_, unicode_

In [None]:
display(np.ones(5, dtype=int))

# display(np.ones(5, dtype=float))

# display(np.ones(5, dtype=np.float64))

# display(np.ones(5, dtype=str))

### Conversione tra tipi

In [None]:
arr = np.ones((4, 5), dtype=int)

# arr = arr.astype(float)
# arr = arr.astype('int32')

arr

## Creazione array

In [None]:
length = 10

np.array(range(length))

# np.arange(length)

# np.linspace(0, 18, length)

In [None]:
shape = (3, 4)
dtype = np.float16

np.ones(shape, dtype=dtype)
np.zeros(shape, dtype=dtype)

In [None]:
shape = (3, 3)

np.eye(3, dtype=dtype)

### Operazioni tra array

Le classiche operazioni matematiche avvengono punto per punto

In [None]:
arr = np.ones((3, 4))
arr + arr

In [None]:
arr == arr
arr < arr

In [None]:
arr = np.array([True, False, True])
arr2 = np.array([True, True, True])

arr & arr2
# arr | arr
# ~arr.astype(bool)

### Prodotto tra matrici

In [None]:
A = np.eye(3)
B = np.ones((3, 3))

np.dot(A, B)
# A.dot(B)
# A @ B

### Indexing

### Slicing

### Slicing indexing

### Bolean indexing

### Fancy indexing

### Trasposta di un vettore/matrice

## Funzioni

### Unary function

### Binary function

### Un esempio con grafico

In [None]:
import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 6 * np.pi, 10000)
y = np.cos(x)

plt.plot(x, y);

## Programmazione vettoriale

In [None]:
arr = np.random.random(size=(4, 5))

np.where(arr>.5, 1, 0)

In [None]:
np.sum(arr)
np.mean(arr)
np.std(arr)
np.min(arr)
np.max(arr)
np.argmin(arr)
np.argmax(arr)
np.cumsum(arr)
np.cumprod(arr)

In [None]:
arr.sum()
arr.min()
arr.argmax()

In [None]:
arr.any()

arr.all()

In [None]:
arr = np.random.normal(size=(4, 4))

# arr.sort()
arr.sort(0)

arr

In [None]:
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 2, 3])

np.unique(x)
np.intersect1d(x, y)
np.union1d(x, y)
np.in1d(x, y)
np.setdiff1d(x, y)
np.setxor1d(x, y)

## Algebra Lineare (numpy.linalg)

In [None]:
m = np.random.random((3, 3))

np.diag(m)
np.linalg.det(m)
np.linalg.inv(m)

q, r = np.linalg.qr(m)
x = np.linalg.solve(m, [1, 2, 3])

## Numeri pseudocasuali

In [None]:
points = np.random.random((2, 1000))
points = np.random.normal(size=(2, 1000))

plt.scatter(points[0, :], points[1, :])
# plt.hist(points[0], bins=20)

In [None]:
import pandas as pd
from pandas import DataFrame as df

In [None]:
df(points.T, columns=("x", "y"))