# Introducción a Numpy

Plan:
 * Crear vectores (arrays)
 * Multi-dementional arrays
 * Element-wise operations: 
   * Comparison operations
   * Logical operations
 * Summarizing operations 

In [1]:
import numpy as np

In [59]:
np

<module 'numpy' from 'C:\\Users\\Usuario\\.conda\\envs\\ml-zoomcamp\\lib\\site-packages\\numpy\\__init__.py'>

## Vectos NumPy 

Creando vectores

* `np.zeros`
* `np.ones`
* `np.full`
* `np.repeat`
* `np.array`
* `np.arange`

In [61]:
zeros = np.zeros(5)
zeros

array([0., 0., 0., 0., 0.])

In [3]:
ones = np.ones(10)
ones

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [62]:
vector = np.full(10, 1.0) # crea 10 componentes y las rellena con ceros
vector

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [9]:
vector = np.repeat(0.0, 10) # repite el cero 10 veces
vector

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
vector = np.repeat([0.0, 1.0], 5) # repite el cero y uno cinco veces
vector

array([0., 0., 0., 0., 0., 1., 1., 1., 1., 1.])

In [17]:
vector = np.repeat([0.0, 1.0], [2, 3]) # repite el cero, 2 veces y el uno, 3 veces
vector

array([0., 0., 1., 1., 1.])

Accede al elemento de un array por indice

In [14]:
el = vector[1]
print(el)

0.0


Accede a elementos múltiples de un array mediante una lista de índices:

In [18]:
vector[[4, 2, 0]]

array([1., 1., 0.])

Asigna:

In [19]:
vector[1] = 1
print(vector)

[0. 1. 1. 1. 1.]


Creación de un array a partir de una lista con enteros:

In [20]:
elements = [1, 2, 3, 4]
array = np.array(elements)
array

array([1, 2, 3, 4])

Especificación del tipo de elementos:

In [25]:
zeros = np.zeros(10, dtype=np.uint8)
zeros

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

`np.arange` para crear rangos:

In [63]:
np.arange(3,10)

array([3, 4, 5, 6, 7, 8, 9])

In [22]:
for i in np.arange(5):
    print(i)

0
1
2
3
4


Linspace - para crear un array con elementos desde `start` hasta `end` de un tamaño determinado: 

In [23]:
thresholds = np.linspace(0, 1, 11)
thresholds

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

Be careful with overflowing: (Tenga cuidado con el desbordamiento:) En muchas situaciones, preferiríamos usar la pila de pydata (numpy / scipy / pandas) para el cálculo sobre python puro. Es importante tener en cuenta que pueden ocurrir desbordamientos, porque las estructuras de datos bajo el capó son de precisión fija.

In [53]:
zeros[0] = 256
zeros

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [54]:
zeros[0] = 256
print(zeros[0])

0


In [58]:
2*256 % 256

0

Este es un entero de 8 bits y por lo tanto 256 es en realidad el número entero más grande que puede contener.

## Multi-dimensional NumPy arrays

Specify the shape with a tuple (Especifica la forma con una tupla):

In [65]:
zeros = np.zeros((5, 2), dtype=np.float32)
zeros

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)

In [66]:
print(zeros.shape)

(5, 2)


In [68]:
# Matriz de tamaño 3x3
np.array([[1,2,3],
          [4,5,6],
          [7,8,9]
         ])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [69]:
A = np.array([[1,2,3],
          [4,5,6],
          [7,8,9]
         ])

In [71]:
A[0,1]

2

In [72]:
B = np.array([[1,2,3],
          [4,5,6],
          [7,8,9]
         ])
B[0,1]=20 # cambiar el valor de la componente
B

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [77]:
B[0] # fila 1 de la matriz B
B[2]=[1,1,1] # cambiar todo la fila 3 por 1,1,1.
B[:,1] # accedes a la columna 2

array([20,  5,  1])

In [79]:
B[:] # la matriz B

array([[ 1, 20,  3],
       [ 4,  5,  6],
       [ 1,  1,  1]])

In [80]:
numbers = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

numbers = np.array(numbers)

In [81]:
print(numbers[0, 1])

2


Assignment: use a tuple (row index, column index)

In [82]:
numbers[0, 1] = 10

In [83]:
numbers

array([[ 1, 10,  3],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [84]:
numbers[0]

array([ 1, 10,  3])

Slicing: getting a column:

In [85]:
numbers[:, 1]

array([10,  5,  8])

Assigning a row:

In [86]:
numbers[1] = [1, 1, 1]

In [87]:
numbers

array([[ 1, 10,  3],
       [ 1,  1,  1],
       [ 7,  8,  9]])

Assigning a column:

In [88]:
numbers[:, 2] = [9, 9, 9]

In [89]:
numbers

array([[ 1, 10,  9],
       [ 1,  1,  9],
       [ 7,  8,  9]])

## Randomly generated arrays


Uniform random numbers between 0 and 1 of shape (5, 2):

In [96]:
np.random.rand(5, 2)

array([[0.80025835, 0.60181712],
       [0.76495986, 0.16922545],
       [0.29302323, 0.52406688],
       [0.35662428, 0.04567897],
       [0.98315345, 0.44135492]])

Set seed for reproducibility:

In [112]:
np.random.seed(2)
arr = np.random.rand(5, 2) # distribución uniforme
arr

array([[0.4359949 , 0.02592623],
       [0.54966248, 0.43532239],
       [0.4203678 , 0.33033482],
       [0.20464863, 0.61927097],
       [0.29965467, 0.26682728]])

In [111]:
np.random.seed(2)
np.random.randn(5, 2) # valores aleatorios de la distribucion normal

array([[-0.41675785, -0.05626683],
       [-2.1361961 ,  1.64027081],
       [-1.79343559, -0.84174737],
       [ 0.50288142, -1.24528809],
       [-1.05795222, -0.90900761]])

Random integers between 0 and 99 (100 is not included)

In [113]:
np.random.seed(2)
np.random.randint(low=0, high=100, size=(5, 2))

array([[40, 15],
       [72, 22],
       [43, 82],
       [75,  7],
       [34, 49]])

## Element-wise operations

In [114]:
rng = np.arange(5)
rng

array([0, 1, 2, 3, 4])

In [116]:
rng + 1

array([1, 2, 3, 4, 5])

Every item in the array is multiplied by 2:

In [115]:
rng * 2

array([0, 2, 4, 6, 8])

In [117]:
rng / 100

array([0.  , 0.01, 0.02, 0.03, 0.04])

In [36]:
(rng - 1) * 3 / 2 + 1

array([-0.5,  1. ,  2.5,  4. ,  5.5])

In [119]:
a = np.array([0.  , 0.01, 0.02, 0.03, 0.04])
b = np.array([1,2,3,4,5])
a+b

array([1.  , 2.01, 3.02, 4.03, 5.04])

Adding one array with another

In [121]:
np.random.seed(2)
noise = 0.01 * np.random.rand(5)
noise

array([0.00435995, 0.00025926, 0.00549662, 0.00435322, 0.00420368])

In [122]:
numbers = np.arange(5)
numbers

array([0, 1, 2, 3, 4])

In [123]:
result = numbers + noise
result

array([0.00435995, 1.00025926, 2.00549662, 3.00435322, 4.00420368])

Rounding the numbers to 4th digit:

In [124]:
result.round(4)

array([0.0044, 1.0003, 2.0055, 3.0044, 4.0042])

Two ways to square each element:

* element-wise multiplication with itself
* the power operator (`**`)

In [125]:
np.random.seed(2)
pred = np.random.rand(3).round(2)
pred

array([0.44, 0.03, 0.55])

In [126]:
square = pred * pred
square

array([0.1936, 0.0009, 0.3025])

In [127]:
square = pred ** 2
square

array([0.1936, 0.0009, 0.3025])

Other element-wise operations:

- `exp`
- `log`
- `sqrt`

In [128]:
np.exp(pred)

array([1.55270722, 1.03045453, 1.73325302])

In [129]:
np.log(pred)

array([-0.82098055, -3.5065579 , -0.597837  ])

In [130]:
np.sqrt(pred)

array([0.66332496, 0.17320508, 0.74161985])

### Comparison operations

In [152]:
np.random.seed(2)
pred = np.random.randn(5).round(2)
pred

array([-0.42, -0.06, -2.14,  1.64, -1.79])

In [153]:
result = pred >= 0.5
result

array([False, False, False,  True, False])

In [182]:
np.random.seed(2)

In [183]:
pred1 = np.random.randn(5).round(2) # el vector aleatorio no cambia si primero se llama la semilla seed
pred1

array([-0.42, -0.06, -2.14,  1.64, -1.79])

In [184]:
pred2 = np.random.randn(5).round(2)
pred2

array([-0.84,  0.5 , -1.25, -1.06, -0.91])

In [166]:
pred1 >= pred2

array([ True, False, False,  True, False])

In [167]:
np.random.seed(2)

In [180]:
pred1 = np.random.randn(5) >= 0.3
pred1

array([False,  True, False, False, False])

In [169]:
pred2 = np.random.randn(5) >= 0.4
pred2

array([False,  True, False, False, False])

In [175]:
pred1 > pred2

array([False, False, False,  True, False])

In [176]:
pred1[pred1 > pred2] # escoge de pred1 lo que cumplen la condicion que pred1 > pred2

array([1.64])

### Logical operations

In [187]:
pred1 & pred2

TypeError: ufunc 'bitwise_and' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [179]:
pred1 | pred2

TypeError: ufunc 'bitwise_or' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

## Summarizing operations

Summarizing operations process and array and return a single number 

In [188]:
np.random.seed(2)
pred = np.random.rand(3).round(2)
pred_sum = pred.sum()
pred

array([0.44, 0.03, 0.55])

In [205]:
minimo = pred.min()
maximo = pred.max()
promedio = pred.mean()
sdt= pred.std()
print("minimo = %s, maximo = %s, promedio=%s, desv estandar= %2f" % (minimo,maximo, promedio,sdt))

minimo = 0.03, maximo = 0.55, promedio=0.34, desv estandar= 0.223756


In [209]:
pred_sum

1.02

In [210]:
print('min = %.2f' % pred.min())
print('mean = %.2f' % pred.mean())
print('max = %.2f' % pred.max())
print('std = %.2f' % pred.std())

min = 0.03
mean = 0.34
max = 0.55
std = 0.22


For two-dimentional array it works in the same way:

In [211]:
np.random.seed(2)
matrix = np.random.rand(4, 3).round(2)
matrix

array([[0.44, 0.03, 0.55],
       [0.44, 0.42, 0.33],
       [0.2 , 0.62, 0.3 ],
       [0.27, 0.62, 0.53]])

In [212]:
matrix.max()

0.62

But we can specify the axis along which we apply the summarizing operation

- `axis=1` - apply to each rows (aplica a cada fila)
- `axis=0` - apply to each column (aplica a cada columna)

In [213]:
matrix.max(axis=1) # maximo por fila

array([0.55, 0.44, 0.62, 0.62])

In [215]:
matrix.max(axis=0) # maximo por columna

array([0.44, 0.62, 0.55])

In [216]:
matrix.sum(axis=1) # suma por filas

array([1.02, 1.19, 1.12, 1.42])

## Sorting

In [217]:
np.random.seed(2)
pred = np.random.rand(4).round(2)
pred

array([0.44, 0.03, 0.55, 0.44])

Creaters a new array:

In [218]:
np.sort(pred) # ordena el array de menor a mayor 

array([0.03, 0.44, 0.44, 0.55])

In [219]:
pred

array([0.44, 0.03, 0.55, 0.44])

Sorts in place:

In [221]:
pred.sort() # otra forma de ordenarlo

In [222]:
pred

array([0.03, 0.44, 0.44, 0.55])

`Argsort` - instead of sorting, return the indexes of the array in sorted order

`Argsort` - en lugar de ordenar, devuelve los índices de la matriz en orden ordenado

In [225]:
np.random.seed(2)
pred = np.random.rand(4).round(2)
pred

array([0.44, 0.03, 0.55, 0.44])

In [226]:
idx = pred.argsort()

In [227]:
idx

array([1, 0, 3, 2], dtype=int64)

In [228]:
pred[idx]

array([0.03, 0.44, 0.44, 0.55])

## Reshaping (reformulado)

The shape of an array cound be changed (La forma de un array puede ser modificada)

In [229]:
rng = np.arange(12)
rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [230]:
rng.shape

(12,)

In [231]:
rng.reshape(4, 3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [232]:
rng.reshape(4, 3, order='F')

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

But number of rows x columns should be equal to the total number of elements

In [233]:
rng.reshape(4, 4)

ValueError: cannot reshape array of size 12 into shape (4,4)

In [234]:
vec = np.arange(3)
vec

array([0, 1, 2])

In [236]:
mat = np.arange(6).reshape(3, 2)
mat

array([[0, 1],
       [2, 3],
       [4, 5]])

Putting mulitple arrays together:

- `concatenate`
- `hstack`
- `vstack`
- `column_stack`

In [239]:
np.concatenate([vec, vec])

array([0, 1, 2, 0, 1, 2])

In [240]:
np.hstack([vec, vec])

array([0, 1, 2, 0, 1, 2])

In [241]:
np.hstack([mat, mat]) # coloca la matriz al lado derecho

array([[0, 1, 0, 1],
       [2, 3, 2, 3],
       [4, 5, 4, 5]])

In [242]:
np.concatenate([mat, mat]) # coloca la matriz de bajo de la otra

array([[0, 1],
       [2, 3],
       [4, 5],
       [0, 1],
       [2, 3],
       [4, 5]])

In [243]:
np.column_stack([vec, mat]) # el vector lo pone como columna y lo pega con la matriz

array([[0, 0, 1],
       [1, 2, 3],
       [2, 4, 5]])

In [244]:
np.column_stack([vec, vec]) # los vectores los pone como columna

array([[0, 0],
       [1, 1],
       [2, 2]])

In [246]:
np.column_stack([vec]) # el vector lo pone como columna

array([[0],
       [1],
       [2]])

In [89]:
np.vstack([vec, vec]) # lo pone como fila

array([[0, 1, 2],
       [0, 1, 2]])

In [247]:
np.vstack([mat, mat])  

array([[0, 1],
       [2, 3],
       [4, 5],
       [0, 1],
       [2, 3],
       [4, 5]])

Transpose

In [248]:
mat.T # transpuesta de la matriz

array([[0, 2, 4],
       [1, 3, 5]])

In [249]:
np.vstack([vec, mat.T])

array([[0, 1, 2],
       [0, 2, 4],
       [1, 3, 5]])

## Slicing (rebanar)

Taking a part of the array

In [250]:
mat = np.arange(15).reshape(5, 3)
mat

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [251]:
mat[:3] # toma solo 3 filas

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [252]:
mat[1:3, :2] # toma fila 2 y 3 con columna 2

array([[3, 4],
       [6, 7]])

In [253]:
mat[:, :2]

array([[ 0,  1],
       [ 3,  4],
       [ 6,  7],
       [ 9, 10],
       [12, 13]])

In [254]:
mat[1:3, :2]

array([[3, 4],
       [6, 7]])

In [255]:
mat[[3, 0, 1]]

array([[ 9, 10, 11],
       [ 0,  1,  2],
       [ 3,  4,  5]])

In [256]:
mat[:, 0] % 2 == 1

array([False,  True, False,  True, False])

In [257]:
mat[mat[:, 0] % 2 == 1]

array([[ 3,  4,  5],
       [ 9, 10, 11]])

## Linear Algebra
### Multiplication

In [284]:
u= np.array([2,4,5,6])
v= np.array([1,0,0,2])
print(2 * u)
print(u + v)
print(u * v)
print(u.dot(v)) # producto punto

[ 4  8 10 12]
[3 4 5 8]
[ 2  0  0 12]
14


Vector-vector multiplication

In [280]:
u.shape[0] == v.shape[0]

True

In [281]:
def vector_vector_multiplication(u,v):
    assert u.shape[0] == v.shape[0]
    
    n = u.shape[0]
    result = 0.0
    for i in range(n):
        result = result + u[i] * v[i]
    
    return result        

In [283]:
u= np.array([2,4,5,6])
v= np.array([1,0,0,2])
vector_vector_multiplication(u,v)

14.0

In [269]:
u = np.array([0, 1, 2])
v = np.array([1, 2, 3])

u.dot(v) # producto punto de u con v

8

Matrix-vector multiplication

In [273]:
X = np.array([
    [0, 1, 2],
    [1, 2, 3],
    [2, 3, 3]
])
print(X,u)

[[0 1 2]
 [1 2 3]
 [2 3 3]] [0 1 2]


In [274]:
X.dot(u) # multiplicacion de una matriz por un vector

array([5, 8, 9])

In [285]:
U = np.array([
    [2, 4, 5, 6],
    [1, 2, 1, 2],
    [3, 1, 2,1]
])

In [291]:
U.shape[1]== v.shape[0]  

True

In [297]:
def matrix_vector_multiplication(U,v):
    assert U.shape[1] == v.shape[0] 
    
    num_rows = U.shape[0]
    result = np.zeros(num_rows)
    
    for i in range(num_rows):
        result[i]= vector_vector_multiplication(U[i],v)
    return result

In [298]:
matrix_vector_multiplication(U,v)

array([14.,  5.,  5.])

multiplication of matrixs

In [276]:
U = np.array([
    [4, 5, 6],
    [5, 6, 7],
    [6, 7, 8]
])
print(X)
print(U)

[[0 1 2]
 [1 2 3]
 [2 3 3]]
[[4 5 6]
 [5 6 7]
 [6 7 8]]


In [299]:
V= np.array([
    [1,1,2],
    [0, 0.5, 1],
    [0, 2, 1],
    [2,1,0]
])

In [301]:
def matrix_matrix_multiplication(U,V):
    assert U.shape[1] == V.shape[0]
    
    num_rows = U.shape[0]
    num_cols = V.shape[1]
    result = np.zeros((num_rows,num_cols))
    
    for i in range(num_cols):
        vi = V[:,i]
        Uvi= matrix_vector_multiplication(U,vi)
        result[:,i] = Uvi 
    return result    

In [302]:
matrix_matrix_multiplication(U,V)

array([[14. , 20. , 13. ],
       [ 5. ,  6. ,  5. ],
       [ 5. ,  8.5,  9. ]])

In [105]:
X.dot(U) # multiplicacion de matrices

array([[17, 20, 23],
       [32, 38, 44],
       [41, 49, 57]])

### identity matrix

In [309]:
I = np.eye(3) # matriz identidad

In [310]:
V.dot(I)

array([[1. , 1. , 2. ],
       [0. , 0.5, 1. ],
       [0. , 2. , 1. ],
       [2. , 1. , 0. ]])

### Inverse

To inverse a matrix, use `inv` function from `linalg` package

In [311]:
A = np.array([
    [0, 1, 2],
    [1, 2, 3],
    [2, 3, 3]
])

In [312]:
Ainv = np.linalg.inv(A) # inversa de la matriz
Ainv

array([[-3.,  3., -1.],
       [ 3., -4.,  2.],
       [-1.,  2., -1.]])

In [313]:
A.dot(Ainv)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

B is not invertible ("singular"):

In [314]:
B = np.array([
    [0, 1, 1],
    [1, 2, 3],
    [2, 3, 5]
])

np.linalg.inv(B)

LinAlgError: Singular matrix

Note: When you need to solve _Ax = b_, you don't really to compute the inverse. You can use `solve`:

In [315]:
b = np.array([1, 2, 3])

In [316]:
x = np.linalg.solve(A, b)
x

array([0., 1., 0.])

In [317]:
A.dot(x)

array([1., 2., 3.])