# Numpy & Pandas



# ¿Por qué NumPy?

Es una librería enfocada al cálculo numérico y manejo de Arrays.

- Es muy veloz, hasta 50 veces más rápido que usar una lista de Python o C.
- Optimiza el almacenamiento en memoria.
- Maneja distintos tipos de datos.
- Es una librería muy poderosa, se pueden crear redes neuronales desde cero.

## Numpy Array

In [1]:
import numpy as np

In [2]:
l = [1, 2, 3, 4, 5, 6, 7, 8, 9]
l

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [3]:
arr = np.array(l)


In [4]:
type(arr)

numpy.ndarray

In [5]:
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
matriz = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
matriz = np.array(matriz)
matriz

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

Indezado

In [7]:
arr[0]

1

In [8]:
arr[0] + arr[5]

7

Imprimir una fila de la matriz

In [9]:
matriz[0]

array([1, 2, 3])

Acceder a un elemento de la matriz

In [10]:
matriz[0, 2]

3

Slicing

In [11]:
# slicing
arr[0:3]

array([1, 2, 3])

In [12]:
arr[:3]

array([1, 2, 3])

In [13]:
arr[0:]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
arr[::3]

array([1, 4, 7])

In [15]:
arr[-1]

9

In [16]:
matriz[1:,  0:2]

array([[4, 5],
       [7, 8]])

## Tipos de datos Numpy

In [17]:
arr = np.array([1, 2, 3, 4])
arr.dtype

dtype('int64')

In [18]:
arra = np.array([1, 2, 3, 4], dtype='float64')
arra.dtype

dtype('float64')

In [19]:
arr = arr.astype(np.float64)

In [20]:
arr.dtype

dtype('float64')

In [21]:
arr = arr.astype(np.bool_)
arr.dtype

dtype('bool')

In [22]:
arr = arr.astype(np.string_)
arr.dtype

dtype('S5')

In [23]:
arr

array([b'True', b'True', b'True', b'True'], dtype='|S5')

In [24]:
arr = np.array(['1', '2', '3', '4'])
arr = arr.astype(np.int8)
arr

array([1, 2, 3, 4], dtype=int8)

## Dimensiones

In [25]:
scalar = np.array(42)
print(scalar)

42


In [26]:
scalar.ndim

0

In [27]:
vector = np.array([1, 2, 3])
print(vector)
vector.ndim

[1 2 3]


1

In [28]:
matriz = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [34, 12, 3]])
print(matriz)
matriz.ndim

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [34 12  3]]


2

In [29]:
tensor = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9], [34, 12, 3]], [[1, 2, 3], [4, 5, 6], [7, 8, 9], [34, 12, 3]]])
print(tensor)
tensor.ndim

[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]
  [34 12  3]]

 [[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]
  [34 12  3]]]


3

Agregar o eliminar dimensiones

In [30]:
vector = np.array([1, 2, 3], ndmin=10)
print(vector)
vector.ndim

[[[[[[[[[[1 2 3]]]]]]]]]]


10

Se pueden expandir las dimensiones, con axis = 0 (filas) o axis = 1 (columnas)

In [31]:
expand = np.expand_dims(np.array([1, 2, 3]), axis = 0)
print(expand)
expand.ndim

[[1 2 3]]


2

Eliminar o comprimir las dimensiones, se comprime al número de dimensiones correcto

In [32]:
print(vector, vector.ndim)
vector_2 = np.squeeze(vector)
print(vector_2, vector_2.ndim)

[[[[[[[[[[1 2 3]]]]]]]]]] 10
[1 2 3] 1


## Reto

1. Definir un tensor de 5D
2. Sumarle una dimensión en cualquier eje
3. Borrar las dimensiones que no se usen

Cuéntanos, ¿Cómo te fue y cómo lo solucionaste?

In [33]:
reto = np.array([[[[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]]],
                [[[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]]],
                [[[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]]],
                [[[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]]],
                [[[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]],
                [[[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12],[13, 14, 15]],
                [[16, 17, 18],[19, 20, 21],[22, 23, 24],[25, 26, 27],[28, 29, 30]],
                [[31, 32, 33],[34, 35, 36],[37, 38, 39],[41, 41, 42],[43, 44, 45]],
                [[46, 47, 48],[49, 50, 51],[52, 53, 54],[55, 56, 57],[58, 59, 60]]]]])

In [34]:
reto.ndim

5

In [35]:
reto = np.expand_dims(reto, axis = 3)
print(reto, reto.ndim)


[[[[[[ 1  2  3]
     [ 4  5  6]
     [ 7  8  9]
     [10 11 12]
     [13 14 15]]]


   [[[16 17 18]
     [19 20 21]
     [22 23 24]
     [25 26 27]
     [28 29 30]]]


   [[[31 32 33]
     [34 35 36]
     [37 38 39]
     [41 41 42]
     [43 44 45]]]


   [[[46 47 48]
     [49 50 51]
     [52 53 54]
     [55 56 57]
     [58 59 60]]]]



  [[[[ 1  2  3]
     [ 4  5  6]
     [ 7  8  9]
     [10 11 12]
     [13 14 15]]]


   [[[16 17 18]
     [19 20 21]
     [22 23 24]
     [25 26 27]
     [28 29 30]]]


   [[[31 32 33]
     [34 35 36]
     [37 38 39]
     [41 41 42]
     [43 44 45]]]


   [[[46 47 48]
     [49 50 51]
     [52 53 54]
     [55 56 57]
     [58 59 60]]]]



  [[[[ 1  2  3]
     [ 4  5  6]
     [ 7  8  9]
     [10 11 12]
     [13 14 15]]]


   [[[16 17 18]
     [19 20 21]
     [22 23 24]
     [25 26 27]
     [28 29 30]]]


   [[[31 32 33]
     [34 35 36]
     [37 38 39]
     [41 41 42]
     [43 44 45]]]


   [[[46 47 48]
     [49 50 51]
     [52 53 54]
     [55 56 57]
     [58

In [36]:
reto = np.squeeze(reto)
print(reto, reto.ndim)

[[[[[ 1  2  3]
    [ 4  5  6]
    [ 7  8  9]
    [10 11 12]
    [13 14 15]]

   [[16 17 18]
    [19 20 21]
    [22 23 24]
    [25 26 27]
    [28 29 30]]

   [[31 32 33]
    [34 35 36]
    [37 38 39]
    [41 41 42]
    [43 44 45]]

   [[46 47 48]
    [49 50 51]
    [52 53 54]
    [55 56 57]
    [58 59 60]]]


  [[[ 1  2  3]
    [ 4  5  6]
    [ 7  8  9]
    [10 11 12]
    [13 14 15]]

   [[16 17 18]
    [19 20 21]
    [22 23 24]
    [25 26 27]
    [28 29 30]]

   [[31 32 33]
    [34 35 36]
    [37 38 39]
    [41 41 42]
    [43 44 45]]

   [[46 47 48]
    [49 50 51]
    [52 53 54]
    [55 56 57]
    [58 59 60]]]


  [[[ 1  2  3]
    [ 4  5  6]
    [ 7  8  9]
    [10 11 12]
    [13 14 15]]

   [[16 17 18]
    [19 20 21]
    [22 23 24]
    [25 26 27]
    [28 29 30]]

   [[31 32 33]
    [34 35 36]
    [37 38 39]
    [41 41 42]
    [43 44 45]]

   [[46 47 48]
    [49 50 51]
    [52 53 54]
    [55 56 57]
    [58 59 60]]]]



 [[[[ 1  2  3]
    [ 4  5  6]
    [ 7  8  9]
    [10 11 12]
    [13 

# Crear arrays

In [38]:
list(range(0, 10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [39]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [41]:
np.zeros(3)

array([0., 0., 0.])

In [42]:
np.zeros((10, 10))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [46]:
np.ones((10, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [47]:
np.linspace(0, 10, 10)

array([ 0.        ,  1.11111111,  2.22222222,  3.33333333,  4.44444444,
        5.55555556,  6.66666667,  7.77777778,  8.88888889, 10.        ])

In [48]:
np.linspace(0, 10, 100)

array([ 0.        ,  0.1010101 ,  0.2020202 ,  0.3030303 ,  0.4040404 ,
        0.50505051,  0.60606061,  0.70707071,  0.80808081,  0.90909091,
        1.01010101,  1.11111111,  1.21212121,  1.31313131,  1.41414141,
        1.51515152,  1.61616162,  1.71717172,  1.81818182,  1.91919192,
        2.02020202,  2.12121212,  2.22222222,  2.32323232,  2.42424242,
        2.52525253,  2.62626263,  2.72727273,  2.82828283,  2.92929293,
        3.03030303,  3.13131313,  3.23232323,  3.33333333,  3.43434343,
        3.53535354,  3.63636364,  3.73737374,  3.83838384,  3.93939394,
        4.04040404,  4.14141414,  4.24242424,  4.34343434,  4.44444444,
        4.54545455,  4.64646465,  4.74747475,  4.84848485,  4.94949495,
        5.05050505,  5.15151515,  5.25252525,  5.35353535,  5.45454545,
        5.55555556,  5.65656566,  5.75757576,  5.85858586,  5.95959596,
        6.06060606,  6.16161616,  6.26262626,  6.36363636,  6.46464646,
        6.56565657,  6.66666667,  6.76767677,  6.86868687,  6.96

Crea una matriz diagonal

In [49]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [50]:
np.random.rand()

0.3000519201184084

In [51]:
np.random.rand(4)

array([0.54408888, 0.74704082, 0.67380733, 0.74485367])

In [52]:
np.random.rand(4, 4)

array([[0.97748474, 0.59826176, 0.05254179, 0.27457506],
       [0.7103514 , 0.48477108, 0.33195274, 0.91682179],
       [0.03633157, 0.61383135, 0.72065162, 0.89771767],
       [0.61295867, 0.9173826 , 0.78427354, 0.13372851]])

In [53]:
np.random.randint(1, 15)

3

In [54]:
np.random.randint(1, 100, (10,10))

array([[81, 10,  6, 41, 77, 34, 74,  8, 36, 70],
       [50,  3, 73, 31, 43, 94, 20, 92, 30, 12],
       [92, 36, 83, 53, 10, 69, 16, 76, 65, 31],
       [37, 74, 19, 39, 18, 97, 51, 21,  3, 25],
       [53, 22, 19, 51, 24, 27, 31, 82, 56, 39],
       [90,  4, 22, 85, 14, 28, 13, 74, 67, 90],
       [14, 75, 29, 49, 90, 78, 94, 78, 10, 90],
       [ 4, 32, 55, 65, 85, 61, 47, 58, 36, 53],
       [23, 97, 58, 82, 31, 21, 34, 15, 82, 14],
       [ 8, 66, 50, 88, 52, 19, 27, 23, 78, 59]])

# Shape y Reshape

In [55]:
arr = np.random.randint(1, 10, (3,2))
arr.shape

(3, 2)

In [56]:
arr

array([[9, 6],
       [2, 8],
       [8, 9]])

# Reshape

Transforma los arreglos

In [57]:
arr.reshape(1,6)

array([[9, 6, 2, 8, 8, 9]])

In [58]:
np.reshape(arr,(1, 6))

array([[9, 6, 2, 8, 8, 9]])

In [60]:
arr

array([[9, 6],
       [2, 8],
       [8, 9]])

Mediante el parámetro 'C' indicamos que el reshape sea modo C, 'F' = fortran y 'A' aleatorio.

In [59]:
np.reshape(arr, (2,3), 'C')

array([[9, 6, 2],
       [8, 8, 9]])

In [61]:
np.reshape(arr, (2,3), 'F')

array([[9, 8, 8],
       [2, 6, 9]])

In [62]:
np.reshape(arr, (2,3), 'A')

array([[9, 6, 2],
       [8, 8, 9]])

# Funciones en numpy

In [63]:
arr = np.random.randint(1, 20, 10)
arr

array([ 5, 11, 18,  2,  8,  3,  9,  4, 18, 11])

In [64]:
matriz = arr.reshape(2, 5)
matriz

array([[ 5, 11, 18,  2,  8],
       [ 3,  9,  4, 18, 11]])

In [65]:
arr.max()

18

Busca el máximo en toda la estructura

In [66]:
matriz.max()

18

Buscar el máximo de cada fila

In [67]:
matriz.max(1)

array([18, 18])

Buscar el máximo de cada columna

In [70]:
matriz.max(0)

array([ 5, 11, 18, 18, 11])

In [71]:
arr.argmax()

2

In [72]:
matriz.argmax()

2

In [74]:
arr.min()


2

In [75]:
arr.min(0)


2

In [77]:
matriz.min()


2

In [78]:
matriz.min(0)


array([3, 9, 4, 2, 8])

In [79]:
matriz.min(1)

array([2, 3])

In [80]:
arr.argmin()

3

In [81]:
matriz.argmin()

3

In [82]:
#pick to pick: la ditancia entre el valor más bajo y el más alto
arr.ptp()

16

In [83]:
matriz.ptp(0)

array([ 2,  2, 14, 16,  3])

In [85]:
arr.sort()
arr

array([ 2,  3,  4,  5,  8,  9, 11, 11, 18, 18])

In [86]:
np.percentile(arr, 50)
#np.percentile(arr, 0)
#np.percentile(arr, 100)


8.5

In [87]:
np.median(arr)

8.5

In [88]:
np.median(matriz, 1)

array([ 4., 11.])

In [89]:
np.std(arr)

5.448853090330111

In [90]:
np.var(arr)

29.689999999999998

In [91]:
np.std(arr)**2

29.69

In [92]:
np.sort(arr)

array([ 2,  3,  4,  5,  8,  9, 11, 11, 18, 18])

In [93]:
np.mean(arr)

8.9

In [98]:
a = np.array([[1, 2], [3, 4]])
b = np.array([5, 6])


In [99]:
np.concatenate((a, b), axis=0)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [100]:
a.ndim

2

In [101]:
b.ndim

1

In [102]:
a

array([[1, 2],
       [3, 4]])

In [103]:
b

array([5, 6])

In [104]:
b = np.expand_dims(b, axis=0)

In [105]:
b.ndim

2

In [106]:
b

array([[5, 6]])

In [107]:
np.concatenate((a, b), axis=0)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [108]:
np.concatenate((a, b), axis=1)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 2 and the array at index 1 has size 1

In [109]:
b

array([[5, 6]])

In [111]:
np.concatenate((a, b.T), axis=1)

array([[1, 2, 5],
       [3, 4, 6]])

# Copy

In [112]:
arr = np.arange(0, 11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [113]:
pice_arr = arr[0:6]

In [114]:
pice_arr[:] = 0
pice_arr 

array([0, 0, 0, 0, 0, 0])

In [115]:
arr

array([ 0,  0,  0,  0,  0,  0,  6,  7,  8,  9, 10])

In [116]:
arr_copy = arr.copy()

In [117]:
arr_copy[:] = 100
arr_copy

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [118]:
arr

array([ 0,  0,  0,  0,  0,  0,  6,  7,  8,  9, 10])

# Condiciones


In [119]:
arr = np.linspace(1, 10, 10, dtype='int8')
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int8)

In [120]:
arr > 5

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [121]:
indices_condicion = arr > 5
indices_condicion

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [122]:
arr[indices_condicion]

array([ 6,  7,  8,  9, 10], dtype=int8)

In [123]:
arr[arr >5]

array([ 6,  7,  8,  9, 10], dtype=int8)

In [124]:
arr[(arr > 5) & (arr < 9)]

array([6, 7, 8], dtype=int8)

In [125]:
arr[arr > 5] = 99

In [126]:
arr

array([ 1,  2,  3,  4,  5, 99, 99, 99, 99, 99], dtype=int8)

# Operaciones

In [127]:
l = [1, 2]
l

[1, 2]

In [128]:
l * 2

[1, 2, 1, 2]

In [129]:
arr = np.arange(0, 10)
arr2 = arr.copy()

In [130]:
arr * 2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [131]:
arr + 2

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [132]:
11/arr

  11/arr


array([        inf, 11.        ,  5.5       ,  3.66666667,  2.75      ,
        2.2       ,  1.83333333,  1.57142857,  1.375     ,  1.22222222])

In [133]:
arr ** 2

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [134]:
arr + arr2

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [135]:
arr * arr2

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [136]:
matriz = arr.reshape(2, 5)
matriz2 = matriz.copy()

In [137]:
matriz + matriz2

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18]])

In [138]:
matriz - matriz2

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [139]:
np.matmul(matriz, matriz2.T)

array([[ 30,  80],
       [ 80, 255]])

In [140]:
matriz @ matriz2.T

array([[ 30,  80],
       [ 80, 255]])

In [141]:
matriz.dot(matriz2.T)

array([[ 30,  80],
       [ 80, 255]])

# Pandas

## Series y DataFrames 

In [3]:
import pandas as pd

In [151]:
students = pd.Series(['Robert', 'Charly', 'George', 'Leo'], index=[1, 7, 10, 30])
students

1     Robert
7     Charly
10    George
30       Leo
dtype: object

In [148]:
students = pd.Series(['Robert', 'Charly', 'George', 'Leo'])
students

0    Robert
1    Charly
2    George
3       Leo
dtype: object

In [149]:
dict = {1:'Robert', 7:'Charly', 10:'George', 30:'Leo'}
pd.Series(dict)

1     Robert
7     Charly
10    George
30       Leo
dtype: object

Se puede crear una serie de pandas desde una lista, un diccionario o incluso un formato json.

In [152]:
students[7]

'Charly'

In [153]:
students[0:3]

1     Robert
7     Charly
10    George
dtype: object

In [155]:
dict = {'Jugador' : ['Navas', 'Mbappe', 'Neymar', 'Messi'] , 
'Altura' : [183.0, 170.0, 170.0, 165.0], 
'Goles': [2, 200, 200, 200]}

In [158]:
df_players = pd.DataFrame(dict, index=[1, 7, 10, 30])

In [159]:
df_players.columns

Index(['Jugador', 'Altura', 'Goles'], dtype='object')

In [160]:
df_players.index

Int64Index([1, 7, 10, 30], dtype='int64')

# Archivos CSV y JSON

In [4]:
df_books = pd.read_csv('bestsellers-with-categories.csv', sep=',', header=0)

In [165]:
df_books.columns

Index(['Name', 'Author', 'User Rating', 'Reviews', 'Price', 'Year', 'Genre'], dtype='object')

In [167]:
pd.read_json('hpcharactersdataraw.json', typ='Series')

0       {'Name': 'Mrs. Abbott', 'Link': 'https://www.h...
1       {'Name': 'Hannah Abbott', 'Link': 'https://www...
2       {'Name': 'Abel Treetops', 'Link': 'https://www...
3       {'Name': 'Euan Abercrombie', 'Link': 'https://...
4       {'Name': 'Aberforth Dumbledore', 'Link': 'http...
                              ...                        
1935    {'Name': 'Georgi Zdravko', 'Link': 'https://ww...
1936    {'Name': 'Zograf', 'Link': 'https://www.hp-lex...
1937    {'Name': 'Zonko', 'Link': 'https://www.hp-lexi...
1938    {'Name': 'Valentina Vázquez', 'Link': 'https:/...
1939    {'Name': 'Zygmunt Budge', 'Link': 'https://www...
Length: 1940, dtype: object

# loc & iloc

In [168]:
df_books[0:4]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction


In [170]:
df_books['Name']

0                          10-Day Green Smoothie Cleanse
1                                      11/22/63: A Novel
2                12 Rules for Life: An Antidote to Chaos
3                                 1984 (Signet Classics)
4      5,000 Awesome Facts (About Everything!) (Natio...
                             ...                        
545         Wrecking Ball (Diary of a Wimpy Kid Book 14)
546    You Are a Badass: How to Stop Doubting Your Gr...
547    You Are a Badass: How to Stop Doubting Your Gr...
548    You Are a Badass: How to Stop Doubting Your Gr...
549    You Are a Badass: How to Stop Doubting Your Gr...
Name: Name, Length: 550, dtype: object

In [172]:
df_books[['Name', 'Author', 'Year']]

Unnamed: 0,Name,Author,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,2016
1,11/22/63: A Novel,Stephen King,2011
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,2018
3,1984 (Signet Classics),George Orwell,2017
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,2019
...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,2019
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,2016
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,2017
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,2018


In [173]:
df_books.loc[:]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


In [175]:
df_books.loc[0:4]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [179]:
df_books.loc[0:4, ['Name', 'Author']]

Unnamed: 0,Name,Author
0,10-Day Green Smoothie Cleanse,JJ Smith
1,11/22/63: A Novel,Stephen King
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson
3,1984 (Signet Classics),George Orwell
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids


In [181]:
df_books.loc[:, ['Reviews']] * -1

Unnamed: 0,Reviews
0,-17350
1,-2052
2,-18979
3,-21424
4,-7665
...,...
545,-9413
546,-14331
547,-14331
548,-14331


In [182]:
df_books.loc[:, ['Author']] == 'JJ Smith'

Unnamed: 0,Author
0,True
1,False
2,False
3,False
4,False
...,...
545,False
546,False
547,False
548,False


In [183]:
df_books.iloc[:]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


In [185]:
df_books.iloc[:, 0:3]

Unnamed: 0,Name,Author,User Rating
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7
1,11/22/63: A Novel,Stephen King,4.6
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7
3,1984 (Signet Classics),George Orwell,4.7
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8
...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7


In [186]:
df_books.iloc[1, 3] * -1

-2052

In [187]:
df_books.iloc[:2, 2:]

Unnamed: 0,User Rating,Reviews,Price,Year,Genre
0,4.7,17350,8,2016,Non Fiction
1,4.6,2052,22,2011,Fiction


# Agregar o eliminar datos con Pandas

In [188]:
df_books.head(2)

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction


In [189]:
# drop columns
df_books.drop('Genre', axis =1).head(2)

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011


In [190]:
df_books.drop('Genre', axis= 1, inplace=True)
# inplace borra la columna no solo en la salida, si no en el df

In [191]:
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019


In [192]:
df_books = df_books.drop('Year', axis=1)
# Otra forma de asegurarnos que drop afecte al df 

In [193]:
df_books.head(2)

Unnamed: 0,Name,Author,User Rating,Reviews,Price
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8
1,11/22/63: A Novel,Stephen King,4.6,2052,22


In [194]:
del df_books['Price']
# Es una función de python y no de pandas

In [195]:
# Para borrar filas
df_books.drop(0, axis=0).head(2)

Unnamed: 0,Name,Author,User Rating,Reviews
1,11/22/63: A Novel,Stephen King,4.6,2052
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979


In [196]:
df_books.drop([0,1,2], axis=0).head(2)

Unnamed: 0,Name,Author,User Rating,Reviews
3,1984 (Signet Classics),George Orwell,4.7,21424
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665


In [197]:
df_books.drop(range(0,10), axis=0).head(2)

Unnamed: 0,Name,Author,User Rating,Reviews
10,A Man Called Ove: A Novel,Fredrik Backman,4.6,23848
11,A Patriot's History of the United States: From...,Larry Schweikart,4.6,460


In [198]:
# Agregar columnas
df_books.head(2)

Unnamed: 0,Name,Author,User Rating,Reviews
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350
1,11/22/63: A Novel,Stephen King,4.6,2052


In [199]:
df_books['New_column'] = np.nan

In [200]:
df_books

Unnamed: 0,Name,Author,User Rating,Reviews,New_column
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,
1,11/22/63: A Novel,Stephen King,4.6,2052,
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,
3,1984 (Signet Classics),George Orwell,4.7,21424,
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,
...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,


In [201]:
data = np.arange(0, df_books.shape[0])
df_books['Range'] = data

In [202]:
df_books

Unnamed: 0,Name,Author,User Rating,Reviews,New_column,Range
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,,4
...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,,545
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,,546
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,,548


In [203]:
# Agregar filas
df_books.append(df_books)

Unnamed: 0,Name,Author,User Rating,Reviews,New_column,Range
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,,0
1,11/22/63: A Novel,Stephen King,4.6,2052,,1
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,,2
3,1984 (Signet Classics),George Orwell,4.7,21424,,3
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,,4
...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,,545
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,,546
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,,547
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,,548


# Manejo de datos nulos

In [204]:
dict ={'col1': [1, 2, 3, np.nan],
 'col2': [4, np.nan, 6, 7],
 'col3': ['a', 'b', 'c', None]}

df = pd.DataFrame(dict)
df

Unnamed: 0,col1,col2,col3
0,1.0,4.0,a
1,2.0,,b
2,3.0,6.0,c
3,,7.0,


In [205]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,True,False
2,False,False,False
3,True,False,True


In [206]:
df.isnull()*1

Unnamed: 0,col1,col2,col3
0,0,0,0
1,0,1,0
2,0,0,0
3,1,0,1


In [207]:
df.fillna('Missing')

Unnamed: 0,col1,col2,col3
0,1,4,a
1,2,Missing,b
2,3,6,c
3,Missing,7,Missing


In [208]:
df.fillna(df.mean())

Unnamed: 0,col1,col2,col3
0,1.0,4.0,a
1,2.0,5.666667,b
2,3.0,6.0,c
3,2.0,7.0,


In [209]:
df.interpolate()

Unnamed: 0,col1,col2,col3
0,1.0,4.0,a
1,2.0,5.0,b
2,3.0,6.0,c
3,3.0,7.0,


In [210]:
df.dropna()

Unnamed: 0,col1,col2,col3
0,1.0,4.0,a
2,3.0,6.0,c


# Filtrado por condiciones

In [212]:
df_books = pd.read_csv('bestsellers-with-categories.csv', sep=',', header=0)
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [213]:
df_books['Year'] > 2016

0      False
1      False
2       True
3       True
4       True
       ...  
545     True
546    False
547     True
548     True
549     True
Name: Year, Length: 550, dtype: bool

In [214]:
gt_2016 = df_books['Year'] > 2016
df_books[gt_2016]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
7,A Gentleman in Moscow: A Novel,Amor Towles,4.7,19699,15,2017,Fiction
8,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,4.7,5983,3,2018,Non Fiction
...,...,...,...,...,...,...,...
544,Wonder,R. J. Palacio,4.8,21625,9,2017,Fiction
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


In [215]:
df_books[df_books['Year'] > 2016]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
7,A Gentleman in Moscow: A Novel,Amor Towles,4.7,19699,15,2017,Fiction
8,"A Higher Loyalty: Truth, Lies, and Leadership",James Comey,4.7,5983,3,2018,Non Fiction
...,...,...,...,...,...,...,...
544,Wonder,R. J. Palacio,4.8,21625,9,2017,Fiction
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


In [216]:
genre_fiction = df_books['Genre'] == 'Fiction'

In [217]:
df_books[genre_fiction & gt_2016]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
7,A Gentleman in Moscow: A Novel,Amor Towles,4.7,19699,15,2017,Fiction
10,A Man Called Ove: A Novel,Fredrik Backman,4.6,23848,8,2017,Fiction
13,A Wrinkle in Time (Time Quintet),Madeleine L'Engle,4.5,5153,5,2018,Fiction
40,"Brown Bear, Brown Bear, What Do You See?",Bill Martin Jr.,4.9,14344,5,2017,Fiction
...,...,...,...,...,...,...,...
509,To Kill a Mockingbird,Harper Lee,4.8,26234,7,2019,Fiction
529,What Should Danny Do? (The Power to Choose Ser...,Adir Levy,4.8,8170,13,2019,Fiction
534,Where the Crawdads Sing,Delia Owens,4.8,87841,15,2019,Fiction
544,Wonder,R. J. Palacio,4.8,21625,9,2017,Fiction


In [218]:
df_books[~gt_2016]

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
5,A Dance with Dragons (A Song of Ice and Fire),George R. R. Martin,4.4,12643,11,2011,Fiction
6,A Game of Thrones / A Clash of Kings / A Storm...,George R. R. Martin,4.7,19735,30,2014,Fiction
9,A Man Called Ove: A Novel,Fredrik Backman,4.6,23848,8,2016,Fiction
...,...,...,...,...,...,...,...
540,Wonder,R. J. Palacio,4.8,21625,9,2013,Fiction
541,Wonder,R. J. Palacio,4.8,21625,9,2014,Fiction
542,Wonder,R. J. Palacio,4.8,21625,9,2015,Fiction
543,Wonder,R. J. Palacio,4.8,21625,9,2016,Fiction


# Funciones principales de Pandas

In [219]:
df_books.info

<bound method DataFrame.info of                                                   Name  \
0                        10-Day Green Smoothie Cleanse   
1                                    11/22/63: A Novel   
2              12 Rules for Life: An Antidote to Chaos   
3                               1984 (Signet Classics)   
4    5,000 Awesome Facts (About Everything!) (Natio...   
..                                                 ...   
545       Wrecking Ball (Diary of a Wimpy Kid Book 14)   
546  You Are a Badass: How to Stop Doubting Your Gr...   
547  You Are a Badass: How to Stop Doubting Your Gr...   
548  You Are a Badass: How to Stop Doubting Your Gr...   
549  You Are a Badass: How to Stop Doubting Your Gr...   

                       Author  User Rating  Reviews  Price  Year        Genre  
0                    JJ Smith          4.7    17350      8  2016  Non Fiction  
1                Stephen King          4.6     2052     22  2011      Fiction  
2          Jordan B. Peterson  

In [220]:
df_books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         550 non-null    object 
 1   Author       550 non-null    object 
 2   User Rating  550 non-null    float64
 3   Reviews      550 non-null    int64  
 4   Price        550 non-null    int64  
 5   Year         550 non-null    int64  
 6   Genre        550 non-null    object 
dtypes: float64(1), int64(3), object(3)
memory usage: 30.2+ KB


In [221]:
# solo de los atributos numéricos
df_books.describe()

Unnamed: 0,User Rating,Reviews,Price,Year
count,550.0,550.0,550.0,550.0
mean,4.618364,11953.281818,13.1,2014.0
std,0.22698,11731.132017,10.842262,3.165156
min,3.3,37.0,0.0,2009.0
25%,4.5,4058.0,7.0,2011.0
50%,4.7,8580.0,11.0,2014.0
75%,4.8,17253.25,16.0,2017.0
max,4.9,87841.0,105.0,2019.0


In [222]:
df_books.tail()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction
549,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2019,Non Fiction


In [223]:
# Identificar que tanta memoria utiliza el dataframe, es conveniente iterarlo. o paralelizarlo
df_books.memory_usage(deep=True)

Index            128
Name           59737
Author         39078
User Rating     4400
Reviews         4400
Price           4400
Year            4400
Genre          36440
dtype: int64

In [224]:
df_books['Author'].value_counts()

Jeff Kinney                           12
Suzanne Collins                       11
Gary Chapman                          11
Rick Riordan                          11
American Psychological Association    10
                                      ..
Paulo Coelho                           1
Maurice Sendak                         1
David Goggins                          1
Chip Gaines                            1
Bob Woodward                           1
Name: Author, Length: 248, dtype: int64

In [225]:
df_books.iloc[0]

Name           10-Day Green Smoothie Cleanse
Author                              JJ Smith
User Rating                              4.7
Reviews                                17350
Price                                      8
Year                                    2016
Genre                            Non Fiction
Name: 0, dtype: object

In [226]:
df_books = df_books.append(df_books.iloc[0])

In [227]:
df_books

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction
549,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2019,Non Fiction


In [228]:
df_books.drop_duplicates(keep='last')

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
5,A Dance with Dragons (A Song of Ice and Fire),George R. R. Martin,4.4,12643,11,2011,Fiction
...,...,...,...,...,...,...,...
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction
549,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2019,Non Fiction


In [229]:
df_books.sort_values('Year', ascending=True)

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
513,"Twilight (The Twilight Saga, Book 1)",Stephenie Meyer,4.7,11676,9,2009,Fiction
418,The Last Olympian (Percy Jackson and the Olymp...,Rick Riordan,4.8,4628,7,2009,Fiction
177,"I, Alex Cross",James Patterson,4.6,1320,7,2009,Fiction
417,The Last Lecture,Randy Pausch,4.7,4028,9,2009,Non Fiction
331,The 7 Habits of Highly Effective People: Power...,Stephen R. Covey,4.6,9325,24,2009,Non Fiction
...,...,...,...,...,...,...,...
239,"National Geographic Kids Why?: Over 1,111 Answ...",Crispin Boyer,4.8,5347,16,2019,Non Fiction
74,Difficult Riddles For Smart Kids: 300 Difficul...,M Prefontaine,4.6,7955,5,2019,Non Fiction
263,P is for Potty! (Sesame Street) (Lift-the-Flap),Naomi Kleinberg,4.7,10820,5,2019,Non Fiction
466,The Subtle Art of Not Giving a F*ck: A Counter...,Mark Manson,4.6,26490,15,2019,Non Fiction


# Groupby

In [231]:
df_books.groupby('Author').count()

Unnamed: 0_level_0,Name,User Rating,Reviews,Price,Year,Genre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abraham Verghese,2,2,2,2,2,2
Adam Gasiewski,1,1,1,1,1,1
Adam Mansbach,1,1,1,1,1,1
Adir Levy,1,1,1,1,1,1
Admiral William H. McRaven,1,1,1,1,1,1
...,...,...,...,...,...,...
Walter Isaacson,3,3,3,3,3,3
William Davis,2,2,2,2,2,2
William P. Young,2,2,2,2,2,2
Wizards RPG Team,3,3,3,3,3,3


In [233]:
df_books.groupby('Author').min()
df_books.groupby('Author').max()
df_books.groupby('Author').mean()
df_books.groupby('Author').sum()

Unnamed: 0_level_0,User Rating,Reviews,Price,Year
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abraham Verghese,9.2,9732,22,4021
Adam Gasiewski,4.4,3113,6,2017
Adam Mansbach,4.8,9568,9,2011
Adir Levy,4.8,8170,13,2019
Admiral William H. McRaven,4.7,10199,11,2017
...,...,...,...,...
Walter Isaacson,13.7,18668,61,6040
William Davis,8.8,14994,12,4025
William P. Young,9.2,39440,16,4026
Wizards RPG Team,14.4,50970,81,6054


In [234]:
df_books.groupby('Author').sum().loc['William Davis']

User Rating        8.8
Reviews        14994.0
Price             12.0
Year            4025.0
Name: William Davis, dtype: float64

In [235]:
df_books.groupby('Author').sum().reset_index()

Unnamed: 0,Author,User Rating,Reviews,Price,Year
0,Abraham Verghese,9.2,9732,22,4021
1,Adam Gasiewski,4.4,3113,6,2017
2,Adam Mansbach,4.8,9568,9,2011
3,Adir Levy,4.8,8170,13,2019
4,Admiral William H. McRaven,4.7,10199,11,2017
...,...,...,...,...,...
243,Walter Isaacson,13.7,18668,61,6040
244,William Davis,8.8,14994,12,4025
245,William P. Young,9.2,39440,16,4026
246,Wizards RPG Team,14.4,50970,81,6054


In [236]:
df_books.groupby('Author').agg(['min', 'max'])

Unnamed: 0_level_0,Name,Name,User Rating,User Rating,Reviews,Reviews,Price,Price,Year,Year,Genre,Genre
Unnamed: 0_level_1,min,max,min,max,min,max,min,max,min,max,min,max
Author,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Abraham Verghese,Cutting for Stone,Cutting for Stone,4.6,4.6,4866,4866,11,11,2010,2011,Fiction,Fiction
Adam Gasiewski,Milk and Vine: Inspirational Quotes From Class...,Milk and Vine: Inspirational Quotes From Class...,4.4,4.4,3113,3113,6,6,2017,2017,Non Fiction,Non Fiction
Adam Mansbach,Go the F**k to Sleep,Go the F**k to Sleep,4.8,4.8,9568,9568,9,9,2011,2011,Fiction,Fiction
Adir Levy,What Should Danny Do? (The Power to Choose Ser...,What Should Danny Do? (The Power to Choose Ser...,4.8,4.8,8170,8170,13,13,2019,2019,Fiction,Fiction
Admiral William H. McRaven,Make Your Bed: Little Things That Can Change Y...,Make Your Bed: Little Things That Can Change Y...,4.7,4.7,10199,10199,11,11,2017,2017,Non Fiction,Non Fiction
...,...,...,...,...,...,...,...,...,...,...,...,...
Walter Isaacson,Leonardo da Vinci,Steve Jobs,4.5,4.6,3014,7827,20,21,2011,2017,Non Fiction,Non Fiction
William Davis,"Wheat Belly: Lose the Wheat, Lose the Weight, ...","Wheat Belly: Lose the Wheat, Lose the Weight, ...",4.4,4.4,7497,7497,6,6,2012,2013,Non Fiction,Non Fiction
William P. Young,The Shack: Where Tragedy Confronts Eternity,The Shack: Where Tragedy Confronts Eternity,4.6,4.6,19720,19720,8,8,2009,2017,Fiction,Fiction
Wizards RPG Team,Player's Handbook (Dungeons & Dragons),Player's Handbook (Dungeons & Dragons),4.8,4.8,16990,16990,27,27,2017,2019,Fiction,Fiction


In [237]:
df_books.groupby('Author').agg({'Reviews' : ['min', 'max'], 'User Rating' : 'sum'})

Unnamed: 0_level_0,Reviews,Reviews,User Rating
Unnamed: 0_level_1,min,max,sum
Author,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Abraham Verghese,4866,4866,9.2
Adam Gasiewski,3113,3113,4.4
Adam Mansbach,9568,9568,4.8
Adir Levy,8170,8170,4.8
Admiral William H. McRaven,10199,10199,4.7
...,...,...,...
Walter Isaacson,3014,7827,13.7
William Davis,7497,7497,8.8
William P. Young,19720,19720,9.2
Wizards RPG Team,16990,16990,14.4


In [238]:
df_books.groupby(['Author', 'Year']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,User Rating,Reviews,Price,Genre
Author,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abraham Verghese,2010,1,1,1,1,1
Abraham Verghese,2011,1,1,1,1,1
Adam Gasiewski,2017,1,1,1,1,1
Adam Mansbach,2011,1,1,1,1,1
Adir Levy,2019,1,1,1,1,1
...,...,...,...,...,...,...
Wizards RPG Team,2017,1,1,1,1,1
Wizards RPG Team,2018,1,1,1,1,1
Wizards RPG Team,2019,1,1,1,1,1
Zhi Gang Sha,2009,1,1,1,1,1


# Combinación de DataFrames

# Merge y Concat

In [4]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
 'B': ['B0', 'B1', 'B2', 'B3'],
 'C': ['C0', 'C1', 'C2', 'C3'],
 'D': ['D0', 'D1', 'D2', 'D3']})

In [5]:
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
 'B': ['B4', 'B5', 'B6', 'B7'],
 'C': ['C4', 'C5', 'C6', 'C7'],
 'D': ['D4', 'D5', 'D6', 'D7']})

In [6]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [7]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [8]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


# Merge

In [12]:
izq = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})

der = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})

In [14]:
izq.merge(der, on='key')

Unnamed: 0,key,A,B,C,D
0,k0,A0,B0,C0,D0
1,k1,A1,B1,C1,D1
2,k2,A2,B2,C2,D2
3,k3,A3,B3,C3,D3


In [15]:
izq = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})

der = pd.DataFrame({'key2': ['k0', 'k1', 'k2', 'k3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})

In [16]:
izq.merge(der, on='key')

KeyError: 'key'

In [17]:
izq.merge(der, left_on='key', right_on='key2')

Unnamed: 0,key,A,B,key2,C,D
0,k0,A0,B0,k0,C0,D0
1,k1,A1,B1,k1,C1,D1
2,k2,A2,B2,k2,C2,D2
3,k3,A3,B3,k3,C3,D3


In [18]:
izq = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})

der = pd.DataFrame({'key2': ['k0', 'k1', 'k2', 'np.nan'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})

In [19]:
izq.merge(der, left_on='key', right_on='key2')

Unnamed: 0,key,A,B,key2,C,D
0,k0,A0,B0,k0,C0,D0
1,k1,A1,B1,k1,C1,D1
2,k2,A2,B2,k2,C2,D2


In [20]:
izq.merge(der, left_on='key', right_on='key2', how='left')

Unnamed: 0,key,A,B,key2,C,D
0,k0,A0,B0,k0,C0,D0
1,k1,A1,B1,k1,C1,D1
2,k2,A2,B2,k2,C2,D2
3,k3,A3,B3,,,


In [21]:
izq.merge(der, left_on='key', right_on='key2', how='right')

Unnamed: 0,key,A,B,key2,C,D
0,k0,A0,B0,k0,C0,D0
1,k1,A1,B1,k1,C1,D1
2,k2,A2,B2,k2,C2,D2
3,,,,np.nan,C3,D3


In [23]:
izq.merge(der, left_on='key', right_on='key2', how='inner')

Unnamed: 0,key,A,B,key2,C,D
0,k0,A0,B0,k0,C0,D0
1,k1,A1,B1,k1,C1,D1
2,k2,A2,B2,k2,C2,D2


# Join

In [25]:
izq = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
'B': ['B0', 'B1', 'B2']}, 
index=['k0', 'k1', 'k2'])

der = pd.DataFrame({'C': ['C0', 'C1', 'C2'],
'D': ['D0', 'D1', 'D2']},
index= ['k0', 'k2', 'k3'])


In [26]:
izq.join(der)

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k1,A1,B1,,
k2,A2,B2,C1,D1


In [27]:
izq.join(der, how='inner')

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k2,A2,B2,C1,D1


In [28]:
izq.join(der, how='outer')

Unnamed: 0,A,B,C,D
k0,A0,B0,C0,D0
k1,A1,B1,,
k2,A2,B2,C1,D1
k3,,,C2,D2


# Pivot y Melt

...


# Apply

In [30]:
def two_times(value):
    return value * 2

In [31]:
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction


In [32]:
df_books['User Rating'].apply(two_times)

0      9.4
1      9.2
2      9.4
3      9.4
4      9.6
      ... 
545    9.8
546    9.4
547    9.4
548    9.4
549    9.4
Name: User Rating, Length: 550, dtype: float64

In [33]:
# Mucho más eficiente que utilizar un for
df_books['Rating_2'] = df_books['User Rating'].apply(two_times)

In [34]:
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Rating_2
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,9.4
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,9.2
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,9.4
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,9.4
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,9.6


In [5]:
df_books['Rating_2'] = df_books['User Rating'].apply(lambda x : x * 3)
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Rating_2
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,14.1
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,13.8
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,14.1
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,14.1
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,14.4


In [7]:
df_books['Rating_2'] = df_books.apply(lambda x : x['User Rating'] * 2 if x['Genre'] == 'Fiction' else x['User Rating'], axis=1)
df_books.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Rating_2
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,4.7
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,9.2
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,4.7
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,9.4
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,4.8
