# NumPy III

In [1]:
import numpy as np

## Operaciones matemáticas y estadísticas

- `sum` : Suma de elementos.
- `mean` : Media aritmética de los elementos.
- `median` : Mediana de los elementos.
- `std` : Desviación estándar de los elementos.
- `var` : Varianza de los elementos.
- `min` : Valor mínimo de los elementos.
- `max` : Valor máximo de los elementos.
- `argmin` : Índice del valor mínimo.
- `argmax` : Índice del valor máximo.
- `cumsum` : Suma acumulada de los elementos.
- `cumprod` : Producto acumulado de los elementos.

In [2]:
array = np.random.randn(1000)

In [3]:
array.mean()

-0.00909496462895357

In [4]:
array.std()

1.015528510127303

In [5]:
array.std()/np.sqrt(1000)

0.03211383120839648

In [6]:
array.median()

AttributeError: 'numpy.ndarray' object has no attribute 'median'

In [7]:
np.median(array)

0.02167440645585781

In [8]:
array.max()

3.8137743951899674

In [9]:
array.argmax()

178

In [10]:
array[array.argmax()]

3.8137743951899674

- Si el array es multidmensional
    - Estas funciones poseen un parámetro `axis` que indica el eje sobre el cual aplicar la operación deseada
    - Si no se especifica, se calcula sobre todo el array ignorando su estructura

In [11]:
array.resize(10,10,10)

In [12]:
array.mean()

0.051475004098705965

In [13]:
array.mean(axis=2)

array([[ 0.35240129, -0.26792983, -0.01642197,  0.12450151,  0.60356325,
        -0.08278733, -0.07588012, -0.43612616,  0.28328453, -0.11401635],
       [-0.04656197,  0.09767378,  0.29618297, -0.27994959, -0.08765102,
        -0.01552908,  0.12912329,  0.33951915, -0.70788904,  0.04961359],
       [ 0.36444579, -0.14894242, -0.50297883,  0.37795453,  0.14929065,
         0.06437596,  0.03488165, -0.04247575, -0.00753644, -0.37711643],
       [-0.01433138,  0.16057881,  0.02318902,  0.21780982,  0.25480602,
         0.59284458,  0.11218412, -0.36350941, -0.27905723, -0.00923065],
       [ 0.23080735,  0.03218667,  0.19451421, -0.31474524, -0.14381154,
         0.22376243, -0.25578619, -0.56858117, -0.12973853,  0.12194701],
       [ 0.36022653, -0.34931263,  0.19105914, -0.03915334, -0.51647727,
         0.10395233,  0.13401091, -0.03339755,  0.23296476,  0.32632258],
       [ 0.19007048, -0.15519966,  0.49647795, -0.21137065,  0.34137809,
         0.22051442,  0.01011167,  0.26772013

In [14]:
_.shape

(10, 10)

## Operaciones booleanas

- Se pueden hacer operaciones matemáticas sobre arrays booleanos
    - True -> 1
    - False -> 0

In [11]:
bool_array = np.array([False, True, True, False, True])

In [12]:
bool_array.sum()

3

In [13]:
bool_array.all()

False

In [15]:
bool_array.any()

True

In [16]:
~bool_array

array([ True, False, False,  True, False])

In [17]:
(~bool_array).any()

True

## Sorting

In [21]:
array = np.random.randn(29)
array

array([-1.42891584,  1.51224787,  1.46560915,  0.31481347,  1.68060023,
        0.23572496, -0.36078275,  1.49925347,  0.02837512, -0.36962333,
        0.46026757,  0.61678859, -0.44091231, -0.00761732,  1.29731   ,
        0.1683137 , -0.32470186,  0.57543419,  1.14367049,  0.27048252,
       -0.41668968, -0.2515468 , -0.80410448, -0.74382945,  1.533022  ,
        0.83490928, -1.1402245 ,  0.39887763, -1.60340221])

In [22]:
np.sort(array)

array([-1.60340221, -1.42891584, -1.1402245 , -0.80410448, -0.74382945,
       -0.44091231, -0.41668968, -0.36962333, -0.36078275, -0.32470186,
       -0.2515468 , -0.00761732,  0.02837512,  0.1683137 ,  0.23572496,
        0.27048252,  0.31481347,  0.39887763,  0.46026757,  0.57543419,
        0.61678859,  0.83490928,  1.14367049,  1.29731   ,  1.46560915,
        1.49925347,  1.51224787,  1.533022  ,  1.68060023])

In [23]:
array

array([-1.42891584,  1.51224787,  1.46560915,  0.31481347,  1.68060023,
        0.23572496, -0.36078275,  1.49925347,  0.02837512, -0.36962333,
        0.46026757,  0.61678859, -0.44091231, -0.00761732,  1.29731   ,
        0.1683137 , -0.32470186,  0.57543419,  1.14367049,  0.27048252,
       -0.41668968, -0.2515468 , -0.80410448, -0.74382945,  1.533022  ,
        0.83490928, -1.1402245 ,  0.39887763, -1.60340221])

In [24]:
array.sort()

In [25]:
array

array([-1.60340221, -1.42891584, -1.1402245 , -0.80410448, -0.74382945,
       -0.44091231, -0.41668968, -0.36962333, -0.36078275, -0.32470186,
       -0.2515468 , -0.00761732,  0.02837512,  0.1683137 ,  0.23572496,
        0.27048252,  0.31481347,  0.39887763,  0.46026757,  0.57543419,
        0.61678859,  0.83490928,  1.14367049,  1.29731   ,  1.46560915,
        1.49925347,  1.51224787,  1.533022  ,  1.68060023])

## Operaciones de conjuntos

- `unique` : Elementos únicos
- `intersect1d` : Intersección de dos arrays
- `union1d` : Unión de dos arrays
- `in1d` : Array booleano que indica si cada elemento del primer array está contenido en el segundo.
- `setdiff1d` : Diferencia entre ambos conjuntos.
- `setxor1d` : Diferencia simétrica entre ambos conjuntos.

In [18]:
a = np.array(['python', 'R', 'C#', 'C++'])
b = np.array(['java', 'javascript', 'python', 'R'])

In [19]:
a.unique()

AttributeError: 'numpy.ndarray' object has no attribute 'unique'

In [20]:
np.unique(a)

array(['C#', 'C++', 'R', 'python'], dtype='<U6')

In [21]:
np.union1d(a, b)

array(['C#', 'C++', 'R', 'java', 'javascript', 'python'], dtype='<U10')

In [30]:
np.intersect1d(a, b)

array(['R', 'python'], dtype='<U10')

In [22]:
np.in1d(a, b)

array([ True,  True, False, False])

## Operaciones algebraicas

A través del módulo `linalg` podemos acceder a multitud de funciones de álgebra lineal (cálculo matricial)

- `diag` : Recupera la diagonal principal de una matriz.
- `dot` : Realiza el producto matricial de dos matrices.
- `trace` : Calcula la traza de una matriz.
- `det` : Calcula el determinante de una matriz.
- `eig` : Calcula los autovalores y autovectores de una matriz.
- `inv` : Calcula la inversa de una matriz.
- `qr` : Calcula la descomposición QR de una matriz.
- `svd` : Calcula la descomposición de valores singulares (Singular Value Decomposition) de una matriz.
- `solve` : Calcula el resultado del sistema lineal Ax = B donde A y B son las matrices de entrada y x la salida.
- `lstsq` : Calcula la solución de mínimos cuadrados a y = Xb, donde y y b son los parámetros de entrada y X la salida.

In [23]:
X = np.random.normal(loc=5, size=6).reshape(2,3)
Y = np.random.normal(loc=5, size=6).reshape(3,2)

In [24]:
X = np.random.randint(20, size=12).reshape(3,4)
Y = np.random.randint(20, size=12).reshape(4,3)

In [25]:
X

array([[ 0, 15,  4, 10],
       [12, 11,  1,  9],
       [ 3,  4,  6,  4]])

In [35]:
Y

array([[ 1, 18,  6],
       [10,  7,  1],
       [18,  8,  4],
       [ 6,  4, 19]])

In [36]:
X * Y

ValueError: operands could not be broadcast together with shapes (3,4) (4,3) 

In [37]:
X.shape

(3, 4)

In [38]:
Y.shape

(4, 3)

In [39]:
X.dot(Y)

array([[452, 479, 294],
       [320, 311, 177],
       [407, 410, 210]])

In [40]:
np.dot(X, Y)

array([[452, 479, 294],
       [320, 311, 177],
       [407, 410, 210]])

In [41]:
X @ Y

array([[452, 479, 294],
       [320, 311, 177],
       [407, 410, 210]])

In [42]:
Y @ X

array([[224, 243, 299, 103],
       [207, 165, 261, 102],
       [360, 282, 450, 174],
       [325, 318, 412, 134]])

In [42]:
M = np.random.normal(size=9).reshape(3, 3)
print(M)
S = M.T @ M
S

[[ 0.19035221 -0.35818818  0.44803192]
 [ 1.75390103  0.54278237  0.84497026]
 [-0.82229798  0.74984916  0.97724598]]


array([[3.78857676, 0.26720519, 0.76369069],
       [0.26720519, 0.98518524, 1.03094231],
       [0.76369069, 1.03094231, 1.86971706]])

In [26]:
from np.linalg import inv

ModuleNotFoundError: No module named 'np'

In [27]:
from numpy.linalg import inv, det, eig

In [28]:
S

NameError: name 'S' is not defined

In [30]:
det(S)

2.452015724284422

In [31]:
eig(S)

(array([5.46330087, 1.64455934, 0.27290946]),
 array([[ 0.56753483, -0.79223443, -0.22420711],
        [-0.09337751,  0.20862135, -0.97352852],
        [-0.8180372 , -0.57344725, -0.044423  ]]))

In [32]:
eig(S)[0].sum()

7.380769668594719

In [33]:
S.trace()

7.38076966859472

In [34]:
eig(S)[0].prod()

2.452015724284417

In [35]:
det(S)

2.452015724284422

In [36]:
inv(S)

array([[0.62479575, 0.68959699, 0.22776372],
       [0.68959699, 3.50085273, 0.09970349],
       [0.22776372, 0.09970349, 0.32967562]])

In [37]:
np.dot(S, inv(S))

array([[ 1.00000000e+00, -1.11022302e-16,  0.00000000e+00],
       [ 4.85722573e-17,  1.00000000e+00, -2.77555756e-17],
       [ 1.11022302e-16,  1.11022302e-16,  1.00000000e+00]])

In [38]:
_.round()

array([[ 1., -0.,  0.],
       [ 0.,  1., -0.],
       [ 0.,  0.,  1.]])

## Funciones financieras

|Función|Descripcción|
|----|---|
|`fv(rate, nper, pmt, pv[, when])`|Calcula el valor futuro.|
|`pv(rate, nper, pmt[, fv, when])`|Calcula el valor presente.|
|`npv(rate, values)`|NPV (Net Present Value) de una serie de flujo de cajas.|
|`pmt(rate, nper, pv[, fv, when])`|Calcula el pago total, principal y intéres.|
|`ppmt(rate, per, nper, pv[, fv, when])`|Calcula el pago contra el principal.|
|`ipmt(rate, per, nper, pv[, fv, when])`|Calcula la proporción del interés del pago.|
|`irr(values)`|Internal Rate of Return (IRR).|
|`mirr(values, finance_rate, reinvest_rate)`| Internal Rate of Return (IRR) Modificada.|
|`nper(rate, pmt, pv[, fv, when])`|Calcula el número de pagos periodicos|
|`rate(nper, pmt, pv, fv[, when, guess, tol, …])`|Calcula la tasa de interes por periodo.|


## Exportar arrays con NumPy

- Podemos usar funciones propias de NumPy para leer y escribir ficheros
    - `np.save()` -> En binario
    - `np.savez()` -> Varios arrays en binario
    - `np.savez_compressed()` -> Varios arrays comprimidos
    - `np.savetxt()` -> Texto plano
    - `np.load()` -> Cargar fichero    

In [39]:
array = np.random.randn(1000)

In [40]:
np.save('tmp/data.npy', array)

FileNotFoundError: [Errno 2] No such file or directory: 'tmp/data.npy'

In [58]:
array_load = np.load('tmp/data.npy')

In [59]:
(array == array_load).all()

True

- Si queremos guardar varios arrays en el mismo fichero

In [60]:
a = np.random.randint(100, size=10)
b = np.random.randn(10)

In [61]:
np.savez('tmp/data.npz', a1=a, a2=b)

In [62]:
ab = np.load('tmp/data.npz')

In [63]:
ab

<numpy.lib.npyio.NpzFile at 0x2df3de6fb48>

In [64]:
from utils import midir
midir(ab)

['_abc_impl',
 '_files',
 'allow_pickle',
 'close',
 'f',
 'fid',
 'files',
 'get',
 'items',
 'iteritems',
 'iterkeys',
 'keys',
 'pickle_kwargs',
 'values',
 'zip']

In [65]:
ab.keys

<bound method Mapping.keys of <numpy.lib.npyio.NpzFile object at 0x000002DF3DE6FB48>>

In [66]:
ab.keys()

KeysView(<numpy.lib.npyio.NpzFile object at 0x000002DF3DE6FB48>)

In [67]:
list(ab.keys())

['a1', 'a2']

In [68]:
ab['a1']

array([92, 86, 81, 38, 32, 53, 45, 58, 64, 50])

In [69]:
a

array([92, 86, 81, 38, 32, 53, 45, 58, 64, 50])

- Compresión de archivos

In [70]:
np.savez_compressed('tmp/data_com.npz', a1=a, a2=b)

- Texto plano

In [71]:
np.savetxt('tmp/data.txt', array)

In [72]:
np.savetxt('tmp/data.txt', array, fmt='%.2f')