## Procesamiento de matrices (arreglos) con Numpy
Numpy is la biblioteca fundamental para el trabajo con computación científica en Python. Provee arreglos multidimencionales de altísima eficiencia y las herramientas para trabajar con estos arreglos. Para los familiarizados con Matlab, Numpy se utiliza de forma muy natural.

In [86]:
import numpy as np
np.__version__

'1.21.5'

### Crear arreglos desde listas

In [87]:
a = np.array([2, 3, 4])
print(a)
print(a.shape)
a.__class__

[2 3 4]
(3,)


numpy.ndarray

In [88]:
# acceder y cambiar valor
print(a[2])
a[2] = 9
print(a)

4
[2 3 9]


In [89]:
# arreglos de dos dimensiones
b = np.array([[2, 3, 4], [5, 6, 7]])
print(b)
print(b.shape)

[[2 3 4]
 [5 6 7]]
(2, 3)


In [90]:
# cambiar valor
b[1, 2] = 9
print(b)

[[2 3 4]
 [5 6 9]]


In [91]:
# o con 3 ...
c = np.array([[[2, 1, 2, 5], [4,2, 5, 2], [9, 8, 7, 6]], [[3,5, 4, 1], [8, 9, 2, 6], [0,1, 8, 8]]])
print(c)
print(c.shape)

[[[2 1 2 5]
  [4 2 5 2]
  [9 8 7 6]]

 [[3 5 4 1]
  [8 9 2 6]
  [0 1 8 8]]]
(2, 3, 4)


In [92]:
# cambiar valor
c[0, 0, 0] = 11
print(c)

[[[11  1  2  5]
  [ 4  2  5  2]
  [ 9  8  7  6]]

 [[ 3  5  4  1]
  [ 8  9  2  6]
  [ 0  1  8  8]]]


In [93]:
# hay que respetar las dimensiones
c[2, 3] = 8

IndexError: index 2 is out of bounds for axis 0 with size 2

## constructores de arreglos especiales

In [94]:
# arreglo vacio
a1 = np.zeros((3, 2))
print(a1)

[[0. 0.]
 [0. 0.]
 [0. 0.]]


In [95]:
# unos
a2 = np.ones((2, 3))
print(a2)

[[1. 1. 1.]
 [1. 1. 1.]]


In [96]:
# constante
a3 = np.full((3, 3), 3.14)
print(a3)

[[3.14 3.14 3.14]
 [3.14 3.14 3.14]
 [3.14 3.14 3.14]]


In [97]:
# identidad
a4 = np.eye(3)
print(a4)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [98]:
a5 = np.random.random((3, 2))
print(a5)

[[0.15604947 0.33328508]
 [0.84206897 0.90528784]
 [0.38797313 0.87984242]]


In [99]:
# Rangos
print(np.arange(2, 14))
print(np.arange(0, 50, 5))

[ 2  3  4  5  6  7  8  9 10 11 12 13]
[ 0  5 10 15 20 25 30 35 40 45]


## Indexado de arreglos

In [102]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
a

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [101]:
# subarreglo
a[:2,1:3]

array([[2, 3],
       [6, 7]])

In [103]:
# Importante: No son nuevos arreglos, sino un acceso a subarreglos
print(a)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [105]:
b = a[:2,1:3]
print(b)

[[2 3]
 [6 7]]


In [106]:
b[0, 0] = 55
print(a)
print(b)

[[ 1 55  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[55  3]
 [ 6  7]]


In [107]:
# Se puede mezclar el indexado entero con los rangos: se obtiene arreglo de menor rango
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(a)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [108]:
print(a[1, :])
print(a[1])

[5 6 7 8]
[5 6 7 8]


In [109]:
print(a[:,2])

[ 3  7 11]


In [110]:
print(a.shape)
print(a[2, :].shape)
print(a[:,2].shape)

(3, 4)
(4,)
(3,)


In [111]:
# Seleccion por arreglo de indices
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
print(a)
print("***")
idxs = np.array([0, 2, 0, 1])
print(a[:,idxs])

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
***
[[ 1  3  1  2]
 [ 4  6  4  5]
 [ 7  9  7  8]
 [10 12 10 11]]


In [112]:
# Idem con filas
print(a[idxs, :])

[[1 2 3]
 [7 8 9]
 [1 2 3]
 [4 5 6]]


In [113]:
# Elementos por sus indices
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
print(a)
idx1 = [0, 1, 2, 1]
idx2 = [2, 0, 1, 0]
a[idx1, idx2]

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


array([3, 4, 8, 4])

In [114]:
# Igual se pueden modificar los elementos indizados ... recordar, es una submatriz del original ...
a[idx1, idx2] += 10
print(a)

[[ 1  2 13]
 [14  5  6]
 [ 7 18  9]
 [10 11 12]]


In [115]:
# Seleccion por el uso de arreglos booleanos
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
print(a)
bool_idx = a % 2 == 0 
print(bool_idx)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[False  True False]
 [ True False  True]
 [False  True False]
 [ True False  True]]


In [116]:
# y ahora se utiliza el arreglo para hacer seleccionar
print(a[bool_idx])

[ 2  4  6  8 10 12]


In [117]:
# o para modificar
a[bool_idx] *= 2
print(a)

[[ 1  4  3]
 [ 8  5 12]
 [ 7 16  9]
 [20 11 24]]


In [118]:
# Noten que el arreglo booleano puede usarse en operaciones en otros arreglos
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
b = np.random.random((4, 3))
print(a)
print(b)
a[b < 0.5] += 19
print("***")
print(a)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[0.02915987 0.59711651 0.09187591]
 [0.78058787 0.2640035  0.73398992]
 [0.15758067 0.22500392 0.73121036]
 [0.95071642 0.71549728 0.82279257]]
***
[[20  2 22]
 [ 4 24  6]
 [26 27  9]
 [10 11 12]]


In [119]:
# Para utilizar multiples condiciones usar el operador & 
a[(b < 0.5) & (b > 0.2)]

array([24, 27])

In [120]:
# A los arreglos se les puede declarar el tipo de datos a utilizar. 
# Si no se pone, numpy elige uno compatible con los datos de inicializacion
x = np.array([1, 2])
print(x.dtype)
y = np.array([1.0, 2.0])
print(y.dtype)
z = np.array([True, False])
print(z.dtype)

int64
float64
bool


In [121]:
# Tambien se puede especificar el tipo
z = np.array([1, 2], dtype=np.float64)
print(z.dtype)
print(z)

float64
[1. 2.]


### Operaciones con arreglos

In [122]:
x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)
print(x)
print(y)

[[1. 2.]
 [3. 4.]]
[[5. 6.]
 [7. 8.]]


In [123]:
print(x+y)
print(np.add(x,y))

[[ 6.  8.]
 [10. 12.]]
[[ 6.  8.]
 [10. 12.]]


In [124]:
# Operaciones elemento a elemento
print(x-y)  # np.substract
print(x*y)  # np.multiply
print(x/y)  # np.divide
print(np.sqrt(x))
print(np.power(x, y))

# Muchas mas en: https://numpy.org/doc/stable/reference/routines.math.html

[[-4. -4.]
 [-4. -4.]]
[[ 5. 12.]
 [21. 32.]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]
[[1.         1.41421356]
 [1.73205081 2.        ]]
[[1.0000e+00 6.4000e+01]
 [2.1870e+03 6.5536e+04]]


In [125]:
# Producto interno: dot
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])

v = np.array([9,10])
w = np.array([11, 12])

v.dot(w)

219

In [126]:
print(x)
print(v)
x.dot(v)

[[1 2]
 [3 4]]
[ 9 10]


array([29, 67])

In [127]:
print(x)
print(y)
x.dot(y)

[[1 2]
 [3 4]]
[[5 6]
 [7 8]]


array([[19, 22],
       [43, 50]])

In [128]:
# Operaciones por ejes
x = np.array([[1,2],[3,4]])
print(x)
print(np.sum(x))
print(np.sum(x, axis=0))
print(np.sum(x, axis=1))

[[1 2]
 [3 4]]
10
[4 6]
[3 7]


In [129]:
# funcionan en matrices de mas dimensiones
x = np.array([[[2, 1, 2, 5], [4,2, 5, 2], [9, 8, 7, 6]], [[3,5, 4, 1], [8, 9, 2, 6], [0,1, 8, 8]]])
print(np.sum(x))
print(np.sum(x, axis=0))
print(np.sum(x, axis=1))
print(np.sum(x, axis=2))

108
[[ 5  6  6  6]
 [12 11  7  8]
 [ 9  9 15 14]]
[[15 11 14 13]
 [11 15 14 15]]
[[10 13 30]
 [13 25 17]]


In [130]:
# Y se puede hacer por conjuntos de axis
print(np.sum(x, axis=(0,1)))
print(np.sum(x, axis=(0,2)))
print(np.sum(x, axis=(1, 2)))

[26 26 28 28]
[23 38 47]
[53 55]


In [131]:
# Traspuesta
x = np.array([[1,2,3], [4, 5, 6]])
print(x)
print(x.T)

[[1 2 3]
 [4 5 6]]
[[1 4]
 [2 5]
 [3 6]]


In [132]:
# 'Stackeado' horizontal y vertical
a = np.array([[2, 3], [5, 6]])
b = np.array([[4, 3], [7, 9], [1, 6]])
c = np.array([[4, 5, 6], [7, 6, 5]])
print(np.vstack([a,b]))
print(np.hstack([a,c]))

[[2 3]
 [5 6]
 [4 3]
 [7 9]
 [1 6]]
[[2 3 4 5 6]
 [5 6 7 6 5]]


In [133]:
# where, similar al operador ternario ? de c++
a = np.array(range(5,15))
print(a)
np.where(a > 7, a, a-5)

[ 5  6  7  8  9 10 11 12 13 14]


array([ 0,  1,  2,  8,  9, 10, 11, 12, 13, 14])

In [134]:
# Si no se ponen valores para retornar, where retorna el indice de los True
np.where(a % 2 == 0)

(array([1, 3, 5, 7, 9]),)

### Cambiando el shape de un arreglo
Nota: no cambia los valores ni su orden!

In [135]:
a = np.array(range(0, 24))
print(a)
print(a.shape)
b = a.reshape((2,12))
print(b)
print(b.shape)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
(24,)
[[ 0  1  2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21 22 23]]
(2, 12)


In [136]:
# El total no puede tener mas ni menos elementos
a.reshape((3, 2))

ValueError: cannot reshape array of size 24 into shape (3,2)

In [137]:
# -1 calcula el tamaño correcto
a.reshape((3,-1))

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21, 22, 23]])

In [138]:
a.reshape((-1, 6))

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [None]:
# -1 puede estar solamente en una dimension
a.reshape((-1, 2, -1))

In [None]:
a.reshape((2, -1, 3))

### Broadcasting
Este es un mecanismos para operar con arreglos de diferentes formas. 

In [139]:
# Ejemplo, sumarle un vector a cada fila de un arreglo
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = np.empty_like(x)  # arreglo vacio con la misma estructura de ...

for i in range(4):
    y[i, :] = x[i, :] + v
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [140]:
# Solucion 2. Armar una nueva matriz repitiendo (v) en varias filas
vv = np.tile(v, (4, 1))
print(vv)
y = x + vv
print(y)

[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [None]:
# Bradcasting hace esto ultimo, de forma automatica
x+v

**Reglas para el bradcasting**
- Si dos arreglos no tienen el mismo rango, adiciona 1 en su rangos a su shape hasta que se igualen

In [141]:
# Eso paso en el ejemplo anterior, pues
print(x.shape)
print(v.shape)

(4, 3)
(3,)


- Dos areglos son compatibles en una dimension si tienen el mismo tamaño, o uno de los arreglos tiene 1 en esa dimensión. 
En el ejemplo anterior, al adicionar 1 al shape de v, quedan compatibles: (4,3) y (1,3)
- Los arreglos pueden recibir broadcasting, si son compatibles en todas las dimensiones

In [142]:
# Compatibles
a = np.random.random((3, 4, 1))
b = np.random.random((3, 1, 4))
c = a+b

In [143]:
# Incompatibles
a = np.random.random((3, 4, 1))
b = np.random.random((3, 2, 4))
c = a+b

ValueError: operands could not be broadcast together with shapes (3,4,1) (3,2,4) 

- Luego del broadcasting, cada arreglo se comporta como si tuviera la shape de la máxima dimensión en cada rango

In [None]:
a = np.random.random((3, 4, 1))
b = np.random.random((1, 1, 5))
(a+b).shape

- Si en una dimensión un arreglo tiene un tamaño 1 y el otro arreglo mayor que 1, el primer arreglo se comporta como si se copiara múltiples veces en esa dimensión

In [144]:
a = np.ones((3, 4))
b = np.ones((3, 1))*3
print(a)
print(b)
a+b

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[3.]
 [3.]
 [3.]]


array([[4., 4., 4., 4.],
       [4., 4., 4., 4.],
       [4., 4., 4., 4.]])

In [145]:
a = np.ones((1, 4))
b = np.ones((3, 4))*5
print(a)
print(b)
a+b

[[1. 1. 1. 1.]]
[[5. 5. 5. 5.]
 [5. 5. 5. 5.]
 [5. 5. 5. 5.]]


array([[6., 6., 6., 6.],
       [6., 6., 6., 6.],
       [6., 6., 6., 6.]])

In [146]:
a = np.ones((1, 4))
b = np.ones((3, 1))*6
print(a)
print(b)
a+b

[[1. 1. 1. 1.]]
[[6.]
 [6.]
 [6.]]


array([[7., 7., 7., 7.],
       [7., 7., 7., 7.],
       [7., 7., 7., 7.]])

In [147]:
# Para saber el resultado de un broadcast (muy util para buscar errores):
print(np.broadcast_to(a, ((3,4))))
print(np.broadcast_to(b, ((3, 4))))

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[6. 6. 6. 6.]
 [6. 6. 6. 6.]
 [6. 6. 6. 6.]]


Las funciones que soportan broadcasting se conocen como *funciones universales*. Estas incluyen todas las que hemos visto hasta ahora y muchas mas.

In [148]:
# Ejemplo, sumar una constante a un arreglo
a = np.random.random((4, 3))
print(a)
print(a + 4)

[[0.27019905 0.24566008 0.98544512]
 [0.85282415 0.30958531 0.34253595]
 [0.89387806 0.71918225 0.9912054 ]
 [0.59136345 0.33790484 0.27921305]]
[[4.27019905 4.24566008 4.98544512]
 [4.85282415 4.30958531 4.34253595]
 [4.89387806 4.71918225 4.9912054 ]
 [4.59136345 4.33790484 4.27921305]]


In [149]:
np.broadcast_to(4, (4, 3))

array([[4, 4, 4],
       [4, 4, 4],
       [4, 4, 4],
       [4, 4, 4]])

In [150]:
np.array(4).shape

()

In [151]:
# normalizar quitando la media a vectores
a = np.random.randint(1, 10, size=(4,3))
print(a)
mean = np.average(a, axis=0)
print(mean)
print(a - mean)
print(np.sum(a-mean))

[[5 6 6]
 [3 5 8]
 [1 2 6]
 [6 2 7]]
[3.75 3.75 6.75]
[[ 1.25  2.25 -0.75]
 [-0.75  1.25  1.25]
 [-2.75 -1.75 -0.75]
 [ 2.25 -1.75  0.25]]
0.0


In [152]:
# uniformizar una matriz de numeros con (x - min)/(max-min)
a = np.random.randint(1, 10, size=(4,3))
mx = np.max(a)
mn = np.min(a)
print(a)
print((a-mn)/(mx-mn))

[[6 3 8]
 [4 7 4]
 [3 6 8]
 [2 8 7]]
[[0.66666667 0.16666667 1.        ]
 [0.33333333 0.83333333 0.33333333]
 [0.16666667 0.66666667 1.        ]
 [0.         1.         0.83333333]]


In [153]:
# Ejemplo, tabla de multiplicar
x = np.array(range(1, 11))
print(x)
x = x[:, np.newaxis]
print(x)
print(x.T)

[ 1  2  3  4  5  6  7  8  9 10]
[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]]
[[ 1  2  3  4  5  6  7  8  9 10]]


In [154]:
# y la tabla
x*x.T

array([[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
       [  2,   4,   6,   8,  10,  12,  14,  16,  18,  20],
       [  3,   6,   9,  12,  15,  18,  21,  24,  27,  30],
       [  4,   8,  12,  16,  20,  24,  28,  32,  36,  40],
       [  5,  10,  15,  20,  25,  30,  35,  40,  45,  50],
       [  6,  12,  18,  24,  30,  36,  42,  48,  54,  60],
       [  7,  14,  21,  28,  35,  42,  49,  56,  63,  70],
       [  8,  16,  24,  32,  40,  48,  56,  64,  72,  80],
       [  9,  18,  27,  36,  45,  54,  63,  72,  81,  90],
       [ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100]])

In [155]:
# Ejemplo: matriz de distancias
coords = np.array([(2, 3), (4, 5), (-1,8), (0, 3)])
x1 = coords[:, np.newaxis, :]
x2 = coords[np.newaxis, :, :]
print(x1)

[[[ 2  3]]

 [[ 4  5]]

 [[-1  8]]

 [[ 0  3]]]


In [None]:
print(x2)

In [None]:
print(x1.shape)
print(x2.shape)

In [156]:
np.broadcast_to(x1, (4, 4, 2))

array([[[ 2,  3],
        [ 2,  3],
        [ 2,  3],
        [ 2,  3]],

       [[ 4,  5],
        [ 4,  5],
        [ 4,  5],
        [ 4,  5]],

       [[-1,  8],
        [-1,  8],
        [-1,  8],
        [-1,  8]],

       [[ 0,  3],
        [ 0,  3],
        [ 0,  3],
        [ 0,  3]]])

In [157]:
np.broadcast_to(x2, (4, 4, 2))

array([[[ 2,  3],
        [ 4,  5],
        [-1,  8],
        [ 0,  3]],

       [[ 2,  3],
        [ 4,  5],
        [-1,  8],
        [ 0,  3]],

       [[ 2,  3],
        [ 4,  5],
        [-1,  8],
        [ 0,  3]],

       [[ 2,  3],
        [ 4,  5],
        [-1,  8],
        [ 0,  3]]])

In [159]:
np.sqrt(np.sum((x1 - x2)**2, axis=-1))

15.874507866387544