# Librería **NumPy** (Numerical Python)

### Librería estándar para trabajar con datos numéricos. Permite generar y manipular datos de manera eficiente. 

### Incorpora la clase **array**.

----------

#### arrays (numpy) vs listas (python nativo)

Las listas son buenos contenedores para cualquier tipo de objeto, es fácil insertar o eliminar elementos. Sin embargo, el hecho de permitir diferentes tipos de elementos requiere espacio en la memoria para cada uno. Además, las operaciones entre elementos son limitadas.

Los arrays son contenedores de **un solo tipo de variables**, ocupan menos memoria y se vuelve mas eficiente su manipulación. Su estructura facilia las operaciones matemáticas entre elementos.

----------

In [1]:
import numpy as np

a = np.array([1,2,3,4])

lista = ['a','b','c']
b = np.array(lista)

print(type(a))
print(type(b))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [2]:
print(a.dtype)
print(b.dtype)

int64
<U1


In [3]:
c = np.array(['c',2.9,3,4])
print(c.dtype)

<U32


In [4]:
c

array(['c', '2.9', '3', '4'], dtype='<U32')

In [5]:
# array de una dimension: vector

d1 = np.array([1,2,3,4])
print('1D ', d1)


1D  [1 2 3 4]


In [6]:
# array de dos dimensiones: matriz
d2 = np.array([[1,2,3,4],[1,2,3,4]]) #2x4
print('2D ', d2)

2D  [[1 2 3 4]
 [1 2 3 4]]


In [7]:

# array de tres dimensiones: cubo
d3 = np.array([[[1,2,3,4],[1,2,3,4]],[[1,2,3,4],[1,2,3,4]]]) #2x4x2
print('3D ', d3)

3D  [[[1 2 3 4]
  [1 2 3 4]]

 [[1 2 3 4]
  [1 2 3 4]]]


In [8]:
d3[0][1][0]

1

In [9]:
lista=[1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7]
lista[3:9:2] # elementos con indice del 3 al 9 con saltos de dos

[4, 6, 8]

## Atributos de un array

Características

In [10]:
d3.dtype

dtype('int64')

In [11]:
d3_float = d3.astype('float')
d3_float.dtype

dtype('float64')

In [12]:
d3.ndim # numero de dimensiones

3

In [13]:
d3.shape

(2, 2, 4)

In [14]:
d3.size # numero de elementos

16

## Funciones generadoras de arrays

In [15]:
np.arange(3,20,1.5) #(inicio,fin, paso)

array([ 3. ,  4.5,  6. ,  7.5,  9. , 10.5, 12. , 13.5, 15. , 16.5, 18. ,
       19.5])

In [16]:
np.linspace(3,20,25) #(inicio,fin, numero de valores en el intervalo)

array([ 3.        ,  3.70833333,  4.41666667,  5.125     ,  5.83333333,
        6.54166667,  7.25      ,  7.95833333,  8.66666667,  9.375     ,
       10.08333333, 10.79166667, 11.5       , 12.20833333, 12.91666667,
       13.625     , 14.33333333, 15.04166667, 15.75      , 16.45833333,
       17.16666667, 17.875     , 18.58333333, 19.29166667, 20.        ])

In [17]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [18]:
np.ones(10)*23

array([23., 23., 23., 23., 23., 23., 23., 23., 23., 23.])

In [19]:
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [20]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

#### Números aleatorios

* rand :  entre 0 y 1

* randn : distribución normal estándar

* randint : enteros



In [21]:
np.random.rand(5) #0-1

array([0.65951542, 0.29658295, 0.24822566, 0.60240725, 0.0752476 ])

In [22]:
np.random.rand(20)*4 + 4 #cualquier rango

array([5.3604329 , 4.25923858, 4.74322609, 4.55313115, 4.75420216,
       4.57409618, 5.2768493 , 6.67961744, 4.41618381, 7.29669485,
       6.50526345, 5.14735005, 4.19382365, 6.96429498, 6.35942005,
       6.2798419 , 7.97915556, 5.77438571, 6.0567909 , 7.47001194])

In [23]:
np.random.randn(3) # distribucion normal

array([-0.79014628, -0.67827934, -0.74278496])

In [24]:
np.random.randn(3,3) # distribucion normal

array([[ 0.93628776,  1.68713001,  0.22467678],
       [ 0.3851496 , -1.04814012, -0.56447966],
       [ 0.38903197, -2.41303031,  0.22029064]])

In [25]:
np.random.randint(20,80,5) #(inicio,fin,cuantos)

array([62, 63, 36, 43, 79])

## Manipulación de arrays

[Array manipulation routines](https://numpy.org/doc/stable/reference/routines.array-manipulation.html)

append, insert, concat, reshape

In [26]:
print(d3)
print(d3.shape)

[[[1 2 3 4]
  [1 2 3 4]]

 [[1 2 3 4]
  [1 2 3 4]]]
(2, 2, 4)


In [27]:
d3_a_d2 = d3.reshape(4,4)
print(d3_a_d2)
print(d3_a_d2.shape)

[[1 2 3 4]
 [1 2 3 4]
 [1 2 3 4]
 [1 2 3 4]]
(4, 4)


In [28]:
x = np.array([2, 4, -4, 9, -1])

np.concatenate((x, [1000, 2222], d3_a_d2[0], d3_a_d2[:,3]))

array([   2,    4,   -4,    9,   -1, 1000, 2222,    1,    2,    3,    4,
          4,    4,    4,    4])

In [29]:
np.append(x, 100) 

array([  2,   4,  -4,   9,  -1, 100])

In [30]:
np.append(x, np.nan)

array([ 2.,  4., -4.,  9., -1., nan])

In [31]:
np.insert(x, 2, 200) # insertar 200 en la posicion con indice 2


array([  2,   4, 200,  -4,   9,  -1])

In [32]:
x = np.arange(20)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [33]:
np.insert(x, [2, 4, -1], [222, 444, -111])

array([   0,    1,  222,    2,    3,  444,    4,    5,    6,    7,    8,
          9,   10,   11,   12,   13,   14,   15,   16,   17,   18, -111,
         19])

In [34]:
d2[0]

array([1, 2, 3, 4])

In [35]:
np.insert(d2[0], 0,5)

array([5, 1, 2, 3, 4])

In [36]:
d2

array([[1, 2, 3, 4],
       [1, 2, 3, 4]])

In [37]:
np.append(d2, [[7, 8, 9, 10]], axis=0)

array([[ 1,  2,  3,  4],
       [ 1,  2,  3,  4],
       [ 7,  8,  9, 10]])

In [38]:
d2

array([[1, 2, 3, 4],
       [1, 2, 3, 4]])

In [39]:
#np.append(d2, [[7, 8]], axis=1) # error

In [40]:
a_to_add = np.reshape([7,8], (2,1)) # array de dos filas y una columna

np.append(d2, a_to_add, axis=1)

array([[1, 2, 3, 4, 7],
       [1, 2, 3, 4, 8]])

In [41]:
np.append(d2, [[7],[8]], axis=1)

array([[1, 2, 3, 4, 7],
       [1, 2, 3, 4, 8]])

In [42]:
np.insert(d2, 1, 10, axis=1)

array([[ 1, 10,  2,  3,  4],
       [ 1, 10,  2,  3,  4]])

In [43]:
np.insert(d2, 1, [[7,8]], axis=1)

array([[1, 7, 2, 3, 4],
       [1, 8, 2, 3, 4]])

In [44]:
np.insert(d2, 1, 10, axis=0)

array([[ 1,  2,  3,  4],
       [10, 10, 10, 10],
       [ 1,  2,  3,  4]])

## Operaciones con arrays

Operaciones elemento a elemento

In [45]:
x = np.array([2.0, 4.6, -4.6, 9.3, -1.2])

y = np.arange(len(x))
y

array([0, 1, 2, 3, 4])

In [46]:
y*y

array([ 0,  1,  4,  9, 16])

In [47]:
y*y + 2*x

array([ 4. , 10.2, -5.2, 27.6, 13.6])

In [48]:
z = y*y + 2*x
z**3 - 5

array([   59.   ,  1056.208,  -145.608, 21019.576,  2510.456])

In [49]:
np.floor(z) #redondea hacia abajo
np.ceil(z) #redondea hacia arriba
w = np.absolute(z)
np.sqrt(w)
np.min(z)
np.max(z)
np.mean(z)
np.median(z)
np.sum(z)

50.2

In [50]:
z

array([ 4. , 10.2, -5.2, 27.6, 13.6])

In [51]:
z*0.1234

array([ 0.4936 ,  1.25868, -0.64168,  3.40584,  1.67824])

In [52]:
np.round(z*0.1234, 1)

array([ 0.5,  1.3, -0.6,  3.4,  1.7])

In [53]:
np.random.seed(2)

m1 = np.random.randint(10,30,(4,5))
m1

array([[18, 25, 23, 18, 21],
       [28, 21, 18, 17, 12],
       [27, 21, 25, 15, 17],
       [13, 16, 14, 20, 21]])

In [54]:
m2 = np.random.randint(0,5,(4,5))
m2

array([[3, 2, 1, 2, 4],
       [3, 0, 4, 3, 1],
       [2, 0, 4, 4, 2],
       [4, 2, 1, 0, 2]])

In [55]:
m1 + m2 

array([[21, 27, 24, 20, 25],
       [31, 21, 22, 20, 13],
       [29, 21, 29, 19, 19],
       [17, 18, 15, 20, 23]])

## Filtros y máscaras

array[condicion]

el modulo [mask](https://numpy.org/doc/stable/reference/maskedarray.generic.html) permite la generación y manipulación de máscaras

In [56]:
x = np.arange(5, 10)
x

array([5, 6, 7, 8, 9])

In [57]:
b = np.array([True, False, False, True, False])
b

array([ True, False, False,  True, False])

In [58]:
b = np.array([True, False, False, True, False], dtype=bool) # buenas practicas
b

array([ True, False, False,  True, False])

In [59]:
x[b]

array([5, 8])

In [60]:
x>7

array([False, False, False,  True,  True])

In [61]:
x%2 == 1

array([ True, False,  True, False,  True])

In [62]:
m2 == 0 # mascara

array([[False, False, False, False, False],
       [False,  True, False, False, False],
       [False,  True, False, False, False],
       [False, False, False,  True, False]])

In [63]:
mask = m2 == 0 # mascara
mask

array([[False, False, False, False, False],
       [False,  True, False, False, False],
       [False,  True, False, False, False],
       [False, False, False,  True, False]])

In [64]:
mask = m2 >= 2
mask

array([[ True,  True, False,  True,  True],
       [ True, False,  True,  True, False],
       [ True, False,  True,  True,  True],
       [ True,  True, False, False,  True]])

In [65]:
m2[mask]

array([3, 2, 2, 4, 3, 4, 3, 2, 4, 4, 2, 4, 2, 2])

In [66]:
m2*mask # los False los pone como 0

array([[3, 2, 0, 2, 4],
       [3, 0, 4, 3, 0],
       [2, 0, 4, 4, 2],
       [4, 2, 0, 0, 2]])

In [67]:
import numpy.ma as ma

x = np.arange(5, 10)

ma.masked_array(x, mask=[1, 1, 0, 1, 0])

masked_array(data=[--, --, 7, --, 9],
             mask=[ True,  True, False,  True, False],
       fill_value=999999)

In [68]:
x_masked = ma.masked_array(x, mask=[1, 1, 0, 1, 0])

x_masked.mean()

8.0

In [69]:
ma.masked_array(m1, mask = m1%2 == 0)

masked_array(
  data=[[--, 25, 23, --, 21],
        [--, 21, --, 17, --],
        [27, 21, 25, 15, 17],
        [13, --, --, --, 21]],
  mask=[[ True, False, False,  True, False],
        [ True, False,  True, False,  True],
        [False, False, False, False, False],
        [False,  True,  True,  True, False]],
  fill_value=999999)

In [70]:
x = [-9999.,1.,-9999.,3.,4.]
mx = ma.masked_values (x, -9999.)
mx

masked_array(data=[--, 1.0, --, 3.0, 4.0],
             mask=[ True, False,  True, False, False],
       fill_value=-9999.0)

In [71]:
mx.filled(mx.mean()) #rellenar los valores vacios

array([2.66666667, 1.        , 2.66666667, 3.        , 4.        ])

## Algebra lineal

submódulo **linalg** para operaciones algebraicas de vectores y matrices.

Producto punto

In [72]:
# producto escalar de dos vectores
a = np.array([1, 2, 3, 4])
b = np.array([1, 0, 1, 2])

a.dot(b)

12

In [73]:
a @ b

12

In [74]:
A = np.array([[1,2,3], [4,5,6], [7,8,9]])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [75]:
A*A # elemento por elemento

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

In [76]:
A.dot(A) #producto matricial

array([[ 30,  36,  42],
       [ 66,  81,  96],
       [102, 126, 150]])

In [77]:
A_matriz = np.mat(A) 

In [78]:
A_matriz*A_matriz  # producto matricial (filas x columnas), no elemento a elemento como con arrays

matrix([[ 30,  36,  42],
        [ 66,  81,  96],
        [102, 126, 150]])

In [93]:
A.T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [79]:
A_matriz.T 

matrix([[1, 4, 7],
        [2, 5, 8],
        [3, 6, 9]])

In [80]:
A_matriz.diagonal()

matrix([[1, 5, 9]])

In [81]:
A_matriz.trace() # traza

matrix([[15]])

In [82]:
np.linalg.det(A_matriz)  

-9.51619735392994e-16

In [83]:
a = np.array([[1, 2], [3, 4]])
np.linalg.inv(a) # A * Ainv = identidad

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [84]:
A_inv = np.linalg.inv(a)
a.dot(A_inv)

array([[1.00000000e+00, 1.11022302e-16],
       [0.00000000e+00, 1.00000000e+00]])

## Estadística

el modulo [statistics](https://numpy.org/doc/stable/reference/routines.statistics.html) tiene funciones básicas de estadística descriptiva

In [96]:
data = np.random.rand(100)

In [101]:
np.ptp(data) # rango: max-min

0.9976871141962937

In [106]:
np.percentile(data, 5) # percentiles

0.05996673511132871

In [117]:
np.median(data)
np.mean(data)
np.std(data)
np.var(data)

0.08083195230929693

In [124]:
col1 = np.random.rand(10)*3 + 10
col2 = np.random.randn(10)*3 + 10

In [148]:
data = np.concatenate((col1.reshape(10,1), col2.reshape(10,1)), axis=1)
data

array([[10.07511582, 11.78787108],
       [10.78454861,  9.32944891],
       [11.89251737, 10.67421254],
       [11.32301031, 11.74034795],
       [11.38971213,  8.54893919],
       [11.51206584, 13.9716865 ],
       [12.41415592,  7.61956271],
       [10.41777702, 12.68173869],
       [10.99831923, 14.14439915],
       [12.12244027,  3.02187729]])

In [149]:
np.min(data)

3.021877291995933

In [151]:
np.min(data, axis=0)

array([10.07511582,  3.02187729])

In [152]:
np.min(data, axis=1)

array([10.07511582,  9.32944891, 10.67421254, 11.32301031,  8.54893919,
       11.51206584,  7.61956271, 10.41777702, 10.99831923,  3.02187729])

In [154]:
np.std(data, axis=0)

array([0.70407693, 3.20154201])

In [164]:
pesos = np.arange(20).reshape(10,2)

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15],
       [16, 17],
       [18, 19]])

In [170]:
#promedio pesado  avg = sum(a * weights) / sum(weights)
np.average(data, axis=0, weights = pesos) 

array([11.47012629,  9.9310957 ])

In [None]:
np.mean(data, axis=0)

In [174]:
np.cov(data) # matriz de covarianza

array([[  1.46676528,  -1.24611483,  -1.043329  ,   0.35739861,
         -2.43277439,   2.1063641 ,  -4.10598236,   1.93880612,
          2.69423246,  -7.79351853],
       [ -1.24611483,   1.05865756,   0.88637749,  -0.30363393,
          2.06680392,  -1.78949664,   3.48830556,  -1.64714497,
         -2.28892997,   6.62111322],
       [ -1.043329  ,   0.88637749,   0.74213333,  -0.25422223,
          1.73046369,  -1.49828386,   2.92063802,  -1.37909771,
         -1.91644217,   5.5436299 ],
       [  0.35739861,  -0.30363393,  -0.25422223,   0.08708535,
         -0.59278073,   0.51324613,  -1.00048209,   0.4724182 ,
          0.65648877,  -1.89900371],
       [ -2.43277439,   2.06680392,   1.73046369,  -0.59278073,
          4.03499543,  -3.4936119 ,   6.8101753 ,  -3.21570051,
         -4.46864934,  12.92631649],
       [  2.1063641 ,  -1.78949664,  -1.49828386,   0.51324613,
         -3.4936119 ,   3.02486689,  -5.89644025,   2.78424344,
          3.86908158, -11.19196635],
       [ -

In [None]:
np.average(data)

In [None]:
np.histogram(data)

## Lectura y escritura de documentos

el modulo [mask](https://numpy.org/doc/stable/reference/maskedarray.generic.html) permite la generación y manipulación de máscaras

In [85]:
data = np.random.rand(10,2)

In [86]:
data

array([[0.46867358, 0.65426618],
       [0.79308974, 0.66306179],
       [0.61302969, 0.990852  ],
       [0.1194848 , 0.14856478],
       [0.85228103, 0.50952588],
       [0.21726993, 0.99326629],
       [0.31529748, 0.25873317],
       [0.80917529, 0.35367538],
       [0.46784249, 0.27417326],
       [0.79820039, 0.81413841]])

In [87]:
np.savetxt('datos.tsv', data, delimiter='\t', fmt='%.3f', header="tiempo\t distancia")

In [88]:
# Lectura
data_read = np.loadtxt("datos.tsv", delimiter="\t")
tiempo = data_read[:, 0]  # primera columna
masa = data_read[:, 1]  # segunda columna

In [89]:
tiempo

array([0.469, 0.793, 0.613, 0.119, 0.852, 0.217, 0.315, 0.809, 0.468,
       0.798])

In [90]:
t, d = np.loadtxt("datos.tsv", delimiter="\t", unpack=True) #unpack lo lee por columnas

In [91]:
t

array([0.469, 0.793, 0.613, 0.119, 0.852, 0.217, 0.315, 0.809, 0.468,
       0.798])