# Operaciones

Los `DataFrames` ofrecen varias operaciones básicas, el listado completo se puede encontrar en la [documentación](https://pandas.pydata.org/docs/user_guide/basics.html). 

## Rango de fechas

In [1]:
import pandas as pd
import numpy as np

# Rango de fechas para usar de índice en un dataframe
index = pd.date_range("7/15/2022", periods=20)

index

DatetimeIndex(['2022-07-15', '2022-07-16', '2022-07-17', '2022-07-18',
               '2022-07-19', '2022-07-20', '2022-07-21', '2022-07-22',
               '2022-07-23', '2022-07-24', '2022-07-25', '2022-07-26',
               '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30',
               '2022-07-31', '2022-08-01', '2022-08-02', '2022-08-03'],
              dtype='datetime64[ns]', freq='D')

## Consultas rápidas

In [2]:
# Lo utilizamos para rellenar un df con valores aleatorios
df = pd.DataFrame(np.random.randn(20, 4), index=index, columns=["A", "B", "C", "D"])

df

Unnamed: 0,A,B,C,D
2022-07-15,0.228747,1.351542,-1.691133,-1.144926
2022-07-16,-0.677065,-0.828561,-0.241494,-1.429295
2022-07-17,0.258946,0.816395,-0.135533,-1.052293
2022-07-18,-0.791374,-0.390109,0.105166,-0.580546
2022-07-19,-1.696169,0.193027,0.418999,-0.406631
2022-07-20,0.017424,-1.401649,1.510763,-1.408976
2022-07-21,-0.432499,-0.132494,1.795029,1.83008
2022-07-22,-0.961798,0.970816,1.030749,0.169591
2022-07-23,1.148965,0.205264,0.932904,-0.773096
2022-07-24,0.099338,0.46124,-1.810362,0.841698


In [3]:
# Primeras filas (cabeza)
df.head()

Unnamed: 0,A,B,C,D
2022-07-15,0.228747,1.351542,-1.691133,-1.144926
2022-07-16,-0.677065,-0.828561,-0.241494,-1.429295
2022-07-17,0.258946,0.816395,-0.135533,-1.052293
2022-07-18,-0.791374,-0.390109,0.105166,-0.580546
2022-07-19,-1.696169,0.193027,0.418999,-0.406631


In [4]:
# Primeras tres filas
df.head(3)

Unnamed: 0,A,B,C,D
2022-07-15,0.228747,1.351542,-1.691133,-1.144926
2022-07-16,-0.677065,-0.828561,-0.241494,-1.429295
2022-07-17,0.258946,0.816395,-0.135533,-1.052293


In [5]:
# Últimas filas (cola)
df.tail()

Unnamed: 0,A,B,C,D
2022-07-30,0.808742,-0.563063,1.976981,0.180013
2022-07-31,-1.80337,-1.344189,0.836296,-0.958101
2022-08-01,2.046087,-0.537131,0.417141,-0.700318
2022-08-02,0.306117,-1.925978,1.108212,-0.465298
2022-08-03,0.973325,0.401423,-0.220785,1.080629


In [6]:
# Últimas tres filas
df.tail(3)

Unnamed: 0,A,B,C,D
2022-08-01,2.046087,-0.537131,0.417141,-0.700318
2022-08-02,0.306117,-1.925978,1.108212,-0.465298
2022-08-03,0.973325,0.401423,-0.220785,1.080629


## Valores únicos

In [17]:
# Definimos un DataFrame con información de diferentes tipos
df = pd.DataFrame({
      'enteros': [100, 200, 300, 400],
    'decimales': [3.14, 2.72, 1.618, 3.14],
      'cadenas': ['hola','adiós','hola','adiós']})

df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [18]:
# Array de valores únicos de una columna
df['cadenas'].unique()

array(['hola', 'adiós'], dtype=object)

In [19]:
# Contador de valores únicos de una columna
df['cadenas'].nunique()

2

In [20]:
# Dataframe con los de valores únicos y su contador de una columna
df['cadenas'].value_counts()

cadenas
hola     2
adiós    2
Name: count, dtype: int64

## Aplicación de funciones

In [21]:
# Método interno de las Series columna
df['decimales'].sum()

10.618

In [22]:
# Aplicar una función predefinida
df['cadenas'].apply(len)

0    4
1    5
2    4
3    5
Name: cadenas, dtype: int64

In [24]:
# Aplicar una función definida
def doblar(n):
    return n*2

df['enteros'].apply(doblar)

0    200
1    400
2    600
3    800
Name: enteros, dtype: int64

In [25]:
# Aplicar una función anónima
df['enteros'].apply(lambda n: n/3)

0     33.333333
1     66.666667
2    100.000000
3    133.333333
Name: enteros, dtype: float64

In [26]:
# Borrar permanentemente una columna
del df['decimales']

In [27]:
df

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós


## Recuperar índices

In [28]:
# Índices de las columnas
df.columns

Index(['enteros', 'cadenas'], dtype='object')

In [29]:
# Índice de las filas
df.index

RangeIndex(start=0, stop=4, step=1)

## Aplicar ordenaciones

In [30]:
# Ordenar por columna (inplace=False por defecto)
df.sort_values(by='enteros')

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós


In [31]:
# Ordenar por columna inversamente (inplace=False por defecto)
df.sort_values(by='enteros',ascending=False)

Unnamed: 0,enteros,cadenas
3,400,adiós
2,300,hola
1,200,adiós
0,100,hola
