# NumPy III

In [2]:
import numpy as np

## Operaciones matemáticas y estadísticas

- `sum` : Suma de elementos.
- `mean` : Media aritmética de los elementos.
- `median` : Mediana de los elementos.
- `std` : Desviación estándar de los elementos.
- `var` : Varianza de los elementos.
- `min` : Valor mínimo de los elementos.
- `max` : Valor máximo de los elementos.
- `argmin` : Índice del valor mínimo.
- `argmax` : Índice del valor máximo.
- `cumsum` : Suma acumulada de los elementos.
- `cumprod` : Producto acumulado de los elementos.

In [7]:
array = np.random.randn(1000)

In [8]:
array.mean()

-0.02254312902532635

In [6]:
array.std()

1.0295763862032468

In [7]:
array.std()/np.sqrt(1000)

0.03255806405527419

In [9]:
array.median()

AttributeError: 'numpy.ndarray' object has no attribute 'median'

In [9]:
np.median(array)

-0.025749179302169325

In [11]:
array.max()

3.3396122915496815

In [13]:
array.argmax() # Te da la posición del número

473

In [14]:
array[array.argmax()]

3.3396122915496815

- Si el array es multidmensional
    - Estas funciones poseen un parámetro `axis` que indica el eje sobre el cual aplicar la operación deseada
    - Si no se especifica, se calcula sobre todo el array ignorando su estructura

In [10]:
array.resize(10,10,10)

In [11]:
array.mean()

-0.02254312902532635

In [14]:
array.mean(axis=2) #Se ha cargado la tercera columna

array([[-0.00657361,  0.19277855, -0.01681809,  0.31832294,  0.00633623,
        -0.4113612 , -0.07840924, -0.33487116, -0.37710895,  0.08744428],
       [-0.13944512, -0.01229012,  0.28249593, -0.41791533,  0.144873  ,
         0.22725631, -0.30865711, -0.23967791,  0.03284368, -0.02649962],
       [-0.58407699, -0.10682657, -0.20079905, -0.06194998,  0.13988382,
        -0.18060712,  0.00068513,  0.44402074, -0.26829144, -0.02895111],
       [ 0.21936332, -0.04305798, -0.09230258, -0.25861282,  0.46488389,
        -0.29708159,  0.29434429,  0.12807242, -0.12661656, -0.18182161],
       [ 0.03669277,  0.01665683, -0.30960676,  0.08748533, -0.02249714,
         0.3373957 , -0.39752889, -0.06009451, -0.30673964,  0.30374146],
       [-0.22388834, -0.27090135,  0.45457229, -0.03309039, -0.06229586,
         0.09934835, -0.1313105 ,  0.15070096,  0.23199631,  0.16494305],
       [-0.16295951,  0.16048732,  0.12188265, -0.19739568, -0.11477275,
         0.03526437, -0.39407331, -0.38718756

In [15]:
_.shape

(10, 10)

## Operaciones booleanas

- Se pueden hacer operaciones matemáticas sobre arrays booleanos
    - True -> 1
    - False -> 0

In [19]:
bool_array = np.array([False, True, True, False, True])

In [20]:
bool_array.sum()

3

In [21]:
bool_array.all()

False

In [22]:
bool_array.any()

True

In [23]:
~bool_array #Invierte los booleanos

array([ True, False, False,  True, False])

In [20]:
(~bool_array).any() #¿Hay algún falso?

True

## Sorting

In [25]:
array = np.random.randn(29)
array

array([ 0.59625867,  0.05800792,  0.66539786,  0.25271024,  0.70473887,
       -0.19588039,  0.37300193,  0.12009103,  0.60672029,  1.20984322,
        0.77690397, -1.67198003,  0.76214232,  1.05116373, -0.99226061,
       -1.07902768,  1.64536247, -0.38665545,  1.11638734,  0.51898397,
        0.70565722,  0.86415069, -1.28009218,  0.96710677,  1.52889318,
        1.40113048,  0.33018385,  0.11668135,  1.54289189])

In [26]:
np.sort(array)

array([-1.67198003, -1.28009218, -1.07902768, -0.99226061, -0.38665545,
       -0.19588039,  0.05800792,  0.11668135,  0.12009103,  0.25271024,
        0.33018385,  0.37300193,  0.51898397,  0.59625867,  0.60672029,
        0.66539786,  0.70473887,  0.70565722,  0.76214232,  0.77690397,
        0.86415069,  0.96710677,  1.05116373,  1.11638734,  1.20984322,
        1.40113048,  1.52889318,  1.54289189,  1.64536247])

In [27]:
array

array([ 0.59625867,  0.05800792,  0.66539786,  0.25271024,  0.70473887,
       -0.19588039,  0.37300193,  0.12009103,  0.60672029,  1.20984322,
        0.77690397, -1.67198003,  0.76214232,  1.05116373, -0.99226061,
       -1.07902768,  1.64536247, -0.38665545,  1.11638734,  0.51898397,
        0.70565722,  0.86415069, -1.28009218,  0.96710677,  1.52889318,
        1.40113048,  0.33018385,  0.11668135,  1.54289189])

In [28]:
array.sort() #Este es inplace

In [29]:
array

array([-1.67198003, -1.28009218, -1.07902768, -0.99226061, -0.38665545,
       -0.19588039,  0.05800792,  0.11668135,  0.12009103,  0.25271024,
        0.33018385,  0.37300193,  0.51898397,  0.59625867,  0.60672029,
        0.66539786,  0.70473887,  0.70565722,  0.76214232,  0.77690397,
        0.86415069,  0.96710677,  1.05116373,  1.11638734,  1.20984322,
        1.40113048,  1.52889318,  1.54289189,  1.64536247])

## Operaciones de conjuntos

- `unique` : Elementos únicos
- `intersect1d` : Intersección de dos arrays
- `union1d` : Unión de dos arrays
- `in1d` : Array booleano que indica si cada elemento del primer array está contenido en el segundo.
- `setdiff1d` : Diferencia entre ambos conjuntos.
- `setxor1d` : Diferencia simétrica entre ambos conjuntos.

In [16]:
a = np.array(['python', 'R', 'C#', 'C++'])
b = np.array(['java', 'javascript', 'python', 'R'])

In [17]:
a.unique()

AttributeError: 'numpy.ndarray' object has no attribute 'unique'

In [18]:
np.unique(a)

array(['C#', 'C++', 'R', 'python'], dtype='<U6')

In [35]:
np.union1d(a, b)

array(['C#', 'C++', 'R', 'java', 'javascript', 'python'], dtype='<U10')

In [36]:
np.intersect1d(a, b)

array(['R', 'python'], dtype='<U10')

In [37]:
np.in1d(a, b)

array([ True,  True, False, False])

## Operaciones algebraicas

A través del módulo `linalg` podemos acceder a multitud de funciones de álgebra lineal (cálculo matricial)

- `diag` : Recupera la diagonal principal de una matriz.
- `dot` : Realiza el producto matricial de dos matrices.
- `trace` : Calcula la traza de una matriz.
- `det` : Calcula el determinante de una matriz.
- `eig` : Calcula los autovalores y autovectores de una matriz.
- `inv` : Calcula la inversa de una matriz.
- `qr` : Calcula la descomposición QR de una matriz.
- `svd` : Calcula la descomposición de valores singulares (Singular Value Decomposition) de una matriz.
- `solve` : Calcula el resultado del sistema lineal Ax = B donde A y B son las matrices de entrada y x la salida.
- `lstsq` : Calcula la solución de mínimos cuadrados a y = Xb, donde y y b son los parámetros de entrada y X la salida.

In [38]:
X = np.random.normal(loc=5, size=6).reshape(2,3)
Y = np.random.normal(loc=5, size=6).reshape(3,2)

In [39]:
X = np.random.randint(20, size=12).reshape(3,4)
Y = np.random.randint(20, size=12).reshape(4,3)

In [40]:
X

array([[12, 10,  0, 17],
       [18, 14,  3, 13],
       [ 3, 11, 12,  5]])

In [41]:
Y

array([[ 5,  7,  4],
       [ 1,  8, 12],
       [11, 13,  8],
       [ 6, 15,  3]])

In [42]:
X * Y

ValueError: operands could not be broadcast together with shapes (3,4) (4,3) 

In [43]:
X.shape

(3, 4)

In [44]:
Y.shape

(4, 3)

In [45]:
X.dot(Y)

array([[172, 419, 219],
       [215, 472, 303],
       [188, 340, 255]])

In [46]:
np.dot(X, Y)

array([[172, 419, 219],
       [215, 472, 303],
       [188, 340, 255]])

In [48]:
X @ Y #Para multiplicar por matrices

array([[172, 419, 219],
       [215, 472, 303],
       [188, 340, 255]])

In [49]:
Y @ X

array([[198, 192,  69, 196],
       [192, 254, 168, 181],
       [390, 380, 135, 396],
       [351, 303,  81, 312]])

In [50]:
M = np.random.normal(size=9).reshape(3, 3)
S = M.T @ M
S

array([[ 1.09697775, -0.89969578, -0.29686498],
       [-0.89969578,  6.0512626 ,  5.09882796],
       [-0.29686498,  5.09882796,  6.29432626]])

In [52]:
from np.linalg import inv

ModuleNotFoundError: No module named 'np'

In [53]:
from numpy.linalg import inv, det, eig

In [54]:
S

array([[ 1.09697775, -0.89969578, -0.29686498],
       [-0.89969578,  6.0512626 ,  5.09882796],
       [-0.29686498,  5.09882796,  6.29432626]])

In [55]:
det(S)

10.358515840133832

In [56]:
eig(S)

(array([11.34222369,  0.61475233,  1.48559059]),
 array([[-0.08195434,  0.72268437,  0.68630226],
        [ 0.6987682 ,  0.53267628, -0.47747145],
        [ 0.71063809, -0.44043533,  0.54864399]]))

In [57]:
eig(S)[0].sum()

13.442566612993948

In [58]:
S.trace()

13.442566612993945

In [59]:
eig(S)[0].prod()

10.35851584013386

In [60]:
det(S)

10.358515840133832

In [61]:
inv(S)

array([[ 1.16721109,  0.40057045, -0.2694388 ],
       [ 0.40057045,  0.65806793, -0.51418685],
       [-0.2694388 , -0.51418685,  0.56269142]])

In [62]:
np.dot(S, inv(S))

array([[ 1.00000000e+00, -1.86029733e-17, -3.47206132e-18],
       [ 4.61056893e-16,  1.00000000e+00, -1.66676588e-16],
       [-1.13333346e-16, -7.15588415e-16,  1.00000000e+00]])

In [63]:
_.round()

array([[ 1., -0., -0.],
       [ 0.,  1., -0.],
       [-0., -0.,  1.]])

## Funciones financieras

|Función|Descripcción|
|----|---|
|`fv(rate, nper, pmt, pv[, when])`|Calcula el valor futuro.|
|`pv(rate, nper, pmt[, fv, when])`|Calcula el valor presente.|
|`npv(rate, values)`|NPV (Net Present Value) de una serie de flujo de cajas.|
|`pmt(rate, nper, pv[, fv, when])`|Calcula el pago total, principal y intéres.|
|`ppmt(rate, per, nper, pv[, fv, when])`|Calcula el pago contra el principal.|
|`ipmt(rate, per, nper, pv[, fv, when])`|Calcula la proporción del interés del pago.|
|`irr(values)`|Internal Rate of Return (IRR).|
|`mirr(values, finance_rate, reinvest_rate)`| Internal Rate of Return (IRR) Modificada.|
|`nper(rate, pmt, pv[, fv, when])`|Calcula el número de pagos periodicos|
|`rate(nper, pmt, pv, fv[, when, guess, tol, …])`|Calcula la tasa de interes por periodo.|


## Exportar arrays con NumPy

- Podemos usar funciones propias de NumPy para leer y escribir ficheros
    - `np.save()` -> En binario
    - `np.savez()` -> Varios arrays en binario
    - `np.savez_compressed()` -> Varios arrays comprimidos
    - `np.savetxt()` -> Texto plano
    - `np.load()` -> Cargar fichero    

In [68]:
array = np.random.randn(1000)

In [70]:
np.save('tmp/data.npy', array)

In [71]:
array_load = np.load('tmp/data.npy')

In [72]:
(array == array_load).all() #¿Son todos True?

True

- Si queremos guardar varios arrays en el mismo fichero

In [73]:
a = np.random.randint(100, size=10)
b = np.random.randn(10)

In [74]:
np.savez('tmp/data.npz', a1=a, a2=b)

In [75]:
ab = np.load('tmp/data.npz')

In [76]:
ab

<numpy.lib.npyio.NpzFile at 0x110a850b8>

In [77]:
from utils import midir
midir(ab)

['_abc_impl',
 '_files',
 'allow_pickle',
 'close',
 'f',
 'fid',
 'files',
 'get',
 'items',
 'iteritems',
 'iterkeys',
 'keys',
 'pickle_kwargs',
 'values',
 'zip']

In [78]:
ab.keys

<bound method Mapping.keys of <numpy.lib.npyio.NpzFile object at 0x110a850b8>>

In [79]:
ab.keys()

KeysView(<numpy.lib.npyio.NpzFile object at 0x110a850b8>)

In [80]:
list(ab.keys())

['a1', 'a2']

In [81]:
ab['a1']

array([81, 74, 84, 23, 83, 90,  4,  8, 22, 25])

In [82]:
a

array([81, 74, 84, 23, 83, 90,  4,  8, 22, 25])

- Compresión de archivos

In [83]:
np.savez_compressed('tmp/data_com.npz', a1=a, a2=b)

- Texto plano

In [84]:
np.savetxt('tmp/data.txt', array)

In [85]:
np.savetxt('tmp/data.txt', array, fmt='%.2f')