# Impontando numpy

In [1]:
import numpy as np

# Data types and attributes

## El principal tipo de dato de NumPy es ndarray.
Todo lo que usemos en numpy va a ser un ndarray (n dimensional array)

In [2]:
a1 = np.array([1,2,3])
a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [4]:
a2 = np.array([[1,2.0,3.0]
               ,[4,5,6.5]])

a3 = np.array([[[1,2,3]
                ,[4,5,6]
                ,[7,8,9]]
               ,[[10,11,12]
                ,[13,14,15]
                ,[16,17,18]]])

In [5]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [6]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

![](anatomia-de-nparray.png)

In [7]:
# Con shape vemos la forma, la cantidad de elementos en cada una de sus dimensiones
a1.shape

(3,)

In [8]:
a2.shape

(2, 3)

In [9]:
a3.shape

(2, 3, 3)

In [10]:
# Con ndim vemos el numero de dimenciones del ndarray
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [11]:
# Con dtype vemos el tipo de los elementos que contiene
a1.dtype, a2.dtype, a3.dtype

(dtype('int32'), dtype('float64'), dtype('int32'))

In [12]:
# size nos dice la cantidad de elementos totales en nuestro array
a1.size, a2.size, a3.size

(3, 6, 18)

# IMPORTANTE!!! todo se trabaja con ndarray.
Los tipos de elementos adentro puede cambiar pero las manipulaciones se hacen a los ndarrays.

## Ej: Create a DataFrame from a NumPy array
Por debajo de pandas funciona numpy. Por lo que se pueden representar los ndarray con formato con pandas

In [13]:
import pandas as pd

df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,5.0,6.5


## 2. Creating arrays

In [14]:
sample_array = np.array([1,2,3])
sample_array

array([1, 2, 3])

In [15]:
# NOTA con Shift+Tab le dice q jupyter que habra el docstring de la funcion
ones = np.ones((2,3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [16]:
ones.dtype

dtype('float64')

In [17]:
zeros = np.zeros((2,3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [18]:
range_array = np.arange(0,10,2)
range_array

array([0, 2, 4, 6, 8])

In [19]:
random_array = np.random.randint(0,10,size=(3,5))
random_array

array([[5, 5, 0, 9, 0],
       [0, 7, 4, 7, 6],
       [5, 7, 8, 3, 5]])

In [20]:
np.random.random(size=(3,5))

array([[0.44042866, 0.58372282, 0.9917102 , 0.92317111, 0.44169782],
       [0.97737518, 0.56608365, 0.00916803, 0.70830043, 0.23000159],
       [0.50710289, 0.09254444, 0.14473554, 0.6398718 , 0.5710181 ]])

In [21]:
random_array_3 = np.random.rand(3,5)
random_array_3

array([[0.39005824, 0.61593637, 0.65224598, 0.34843792, 0.85818923],
       [0.28186296, 0.08779069, 0.87232289, 0.8066366 , 0.34793255],
       [0.591885  , 0.1219024 , 0.62579258, 0.71549701, 0.14766709]])

### Los numeros random de np son pseudo-random. Si le seteamos una semilla podemos generar numeros aleatorios reproducibles. Por si queremos reproducir las pruebas con los mismos numeros

In [22]:
np.random.seed(seed=0)
random_array_4 = np.random.randint(10,size=(5,3))
random_array_4

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

# 3. Viewing arrays and matrices

In [23]:
# Para encontrar los numeros unicos en un array
np.unique(random_array_4)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
a1

array([1, 2, 3])

In [25]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [26]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [27]:
a1[0]

1

In [28]:
a2[0]

array([1., 2., 3.])

In [29]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Se puede usar slicing con arrays

In [30]:
# Si quiero los primeros dos elementos de linea, columna, array
a3[:2, :2, :2]

array([[[ 1,  2],
        [ 4,  5]],

       [[10, 11],
        [13, 14]]])

In [31]:
a4 = np.random.randint(10,size=(2,3,4,5))
a4

array([[[[6, 7, 7, 8, 1],
         [5, 9, 8, 9, 4],
         [3, 0, 3, 5, 0],
         [2, 3, 8, 1, 3]],

        [[3, 3, 7, 0, 1],
         [9, 9, 0, 4, 7],
         [3, 2, 7, 2, 0],
         [0, 4, 5, 5, 6]],

        [[8, 4, 1, 4, 9],
         [8, 1, 1, 7, 9],
         [9, 3, 6, 7, 2],
         [0, 3, 5, 9, 4]]],


       [[[4, 6, 4, 4, 3],
         [4, 4, 8, 4, 3],
         [7, 5, 5, 0, 1],
         [5, 9, 3, 0, 5]],

        [[0, 1, 2, 4, 2],
         [0, 3, 2, 0, 7],
         [5, 9, 0, 2, 7],
         [2, 9, 2, 3, 3]],

        [[2, 3, 4, 1, 2],
         [9, 1, 4, 6, 8],
         [2, 3, 0, 0, 6],
         [0, 6, 3, 3, 8]]]])

In [32]:
a4.shape,a4.ndim

((2, 3, 4, 5), 4)

### Numpy arma los arrays de afuera para adentro. 
#### El la primer forma (2) sera el primer corchete -> habra dos elementos que contengan la cantidad de elementos que vengan en el siguiente campo (3).
#### El de mas a la derecha sera la dimension del elemento mas interno. En este caso un array de 5 numeros.

In [33]:
# Como obtengo los primeros 4 numeros del array mas interno:
a4[:,:,:,:4]

array([[[[6, 7, 7, 8],
         [5, 9, 8, 9],
         [3, 0, 3, 5],
         [2, 3, 8, 1]],

        [[3, 3, 7, 0],
         [9, 9, 0, 4],
         [3, 2, 7, 2],
         [0, 4, 5, 5]],

        [[8, 4, 1, 4],
         [8, 1, 1, 7],
         [9, 3, 6, 7],
         [0, 3, 5, 9]]],


       [[[4, 6, 4, 4],
         [4, 4, 8, 4],
         [7, 5, 5, 0],
         [5, 9, 3, 0]],

        [[0, 1, 2, 4],
         [0, 3, 2, 0],
         [5, 9, 0, 2],
         [2, 9, 2, 3]],

        [[2, 3, 4, 1],
         [9, 1, 4, 6],
         [2, 3, 0, 0],
         [0, 6, 3, 3]]]])

# Manipulating and compearing arrays

### Arithmetic

In [34]:
a1

array([1, 2, 3])

In [35]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [36]:
# SUMA
a1 + ones

array([2., 3., 4.])

In [37]:
# RESTA
a1 - ones

array([0., 1., 2.])

In [38]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [39]:
# MULTIPLICACION
a1 * a2

array([[ 1. ,  4. ,  9. ],
       [ 4. , 10. , 19.5]])

In [40]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [41]:
a2 * a3

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes. Broadcasting provides a means of vectorizing array operations so that looping occurs in C instead of Python

In [43]:
# How can you reshape a2 to be compatible with a3?
# Search: how to reshape numpy array?
a5 = np.reshape(a2,(2,3,3))

ValueError: cannot reshape array of size 6 into shape (2,3,3)

In [44]:
# Division
a1 / ones

array([1., 2., 3.])

In [45]:
# Floor division removes the decimals (round down)
a2 // a1

array([[1., 1., 1.],
       [4., 2., 2.]])

In [46]:
# elevado al cuadrado
a2 ** 2

array([[ 1.  ,  4.  ,  9.  ],
       [16.  , 25.  , 42.25]])

In [47]:
np.square(a2)

array([[ 1.  ,  4.  ,  9.  ],
       [16.  , 25.  , 42.25]])

In [48]:
# Modulo
a1 % 2

array([1, 0, 1], dtype=int32)

## Aggregation
Aggregation = performing the same operation on a number of things

In [50]:
listy_list = [1,2,3]
type(listy_list)

list

In [51]:
sum(listy_list)

6

In [52]:
# Se puede aplicar lo mismo con numpy arrays
sum(a1)

6

In [53]:
# numpy tambien tiene su propia implementacion de esto
np.sum(a1)

6

### Reglas
#### Use python methods (`sum()`) on python datatypes and use Numpy's methods (`np.sum()`) on numpys arrays

In [54]:
# create a massive Numpy array
massive_array = np.random.random(100000)
massive_array.size

100000

In [56]:
# para ver los primeros 10 elementos:
massive_array[:10]

array([0.16494046, 0.36980809, 0.14644176, 0.56961841, 0.70373728,
       0.28847644, 0.43328806, 0.75610669, 0.39609828, 0.89603839])

#### Timelit: funcion magica de jupyter notebook para motrarte cuanto tarda en ejecutarse una linea de codigo

In [57]:
%timeit sum(massive_array) # python's sum
%timeit np.sum(massive_array) # numpy's sum

13.9 ms ± 691 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
38.3 µs ± 4.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [58]:
a2

array([[1. , 2. , 3. ],
       [4. , 5. , 6.5]])

In [59]:
# Media
np.mean(a2)

3.5833333333333335

In [60]:
# maximo
np.max(a2)

6.5

In [61]:
# minimo
np.min(a2)

1.0

In [62]:
# desviacion estandard = a measure of how spread out a group of numbers is from the mean
np.std(a2)

1.8352262954621035

In [63]:
# Varianca = measure of the average degree to wich each number is different to the main
# higher variance = wider range of numbers
# lower variance = lower range of numbers
np.var(a2)

3.368055555555556

In [64]:
# La desviacion estandard es la raiz cuadrada de la de varianza
np.sqrt(np.var(a2))

1.8352262954621035