# Operaciones

Los `DataFrames` ofrecen varias operaciones básicas, el listado completo se puede encontrar en la [documentación](https://pandas.pydata.org/docs/user_guide/basics.html). 

## Rango de fechas

In [1]:
import pandas as pd
import numpy as np

# Rango de fechas para usar de índice en un dataframe
index = pd.date_range("7/15/2022", periods=20)

index

DatetimeIndex(['2022-07-15', '2022-07-16', '2022-07-17', '2022-07-18',
               '2022-07-19', '2022-07-20', '2022-07-21', '2022-07-22',
               '2022-07-23', '2022-07-24', '2022-07-25', '2022-07-26',
               '2022-07-27', '2022-07-28', '2022-07-29', '2022-07-30',
               '2022-07-31', '2022-08-01', '2022-08-02', '2022-08-03'],
              dtype='datetime64[ns]', freq='D')

## Consultas rápidas

In [3]:
# Lo utilizamos para rellenar un df con valores aleatorios
df = pd.DataFrame(np.random.randn(20, 4), index=index, columns=["A", "B", "C", "D"])

df

Unnamed: 0,A,B,C,D
2022-07-15,-0.6443,-0.223524,-0.418594,-0.790326
2022-07-16,-0.556459,-0.779092,-1.208824,-0.837636
2022-07-17,-0.446062,-1.158868,-1.872284,-0.502233
2022-07-18,1.319355,-0.362054,0.124695,-1.094839
2022-07-19,-0.623896,0.654423,-2.543371,0.029128
2022-07-20,-1.046075,-0.396897,0.273237,0.059169
2022-07-21,1.161246,1.408695,0.378178,0.596139
2022-07-22,-0.487008,0.280378,0.679403,0.002192
2022-07-23,-0.57682,-0.505063,0.472159,-1.450503
2022-07-24,-2.523451,0.495984,-0.81927,1.471802


In [4]:
# Primeras filas (cabeza)
df.head()

Unnamed: 0,A,B,C,D
2022-07-15,-0.6443,-0.223524,-0.418594,-0.790326
2022-07-16,-0.556459,-0.779092,-1.208824,-0.837636
2022-07-17,-0.446062,-1.158868,-1.872284,-0.502233
2022-07-18,1.319355,-0.362054,0.124695,-1.094839
2022-07-19,-0.623896,0.654423,-2.543371,0.029128


In [5]:
# Primeras tres filas
df.head(3)

Unnamed: 0,A,B,C,D
2022-07-15,-0.6443,-0.223524,-0.418594,-0.790326
2022-07-16,-0.556459,-0.779092,-1.208824,-0.837636
2022-07-17,-0.446062,-1.158868,-1.872284,-0.502233


In [6]:
# Últimas filas (cola)
df.tail()

Unnamed: 0,A,B,C,D
2022-07-30,-0.666578,-1.022763,1.215372,0.090209
2022-07-31,0.514172,-1.897299,0.99522,0.189199
2022-08-01,0.458931,1.842278,0.828621,0.778399
2022-08-02,0.856764,-0.617094,-0.306749,0.521316
2022-08-03,1.35275,-2.314669,2.051563,-0.741298


In [7]:
# Últimas tres filas
df.tail(3)

Unnamed: 0,A,B,C,D
2022-08-01,0.458931,1.842278,0.828621,0.778399
2022-08-02,0.856764,-0.617094,-0.306749,0.521316
2022-08-03,1.35275,-2.314669,2.051563,-0.741298


## Valores únicos

In [8]:
# Definimos un DataFrame con información de diferentes tipos
df = pd.DataFrame({
      'enteros': [100, 200, 300, 400],
    'decimales': [3.14, 2.72, 1.618, 3.14],
      'cadenas': ['hola','adiós','hola','adiós']})

df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [9]:
# Array de valores únicos de una columna
df['cadenas'].unique()

array(['hola', 'adiós'], dtype=object)

In [10]:
# Contador de valores únicos de una columna
df['cadenas'].nunique()

2

In [11]:
# Dataframe con los de valores únicos y su contador de una columna
df['cadenas'].value_counts()

hola     2
adiós    2
Name: cadenas, dtype: int64

## Aplicación de funciones

In [12]:
# Método interno de las Series columna
df['decimales'].sum()

10.618

In [13]:
# Aplicar una función predefinida
df['cadenas'].apply(len)

0    4
1    5
2    4
3    5
Name: cadenas, dtype: int64

In [14]:
# Aplicar una función definida
def doblar(n):
    return n*2

df['enteros'].apply(doblar)

0    200
1    400
2    600
3    800
Name: enteros, dtype: int64

In [15]:
# Aplicar una función anónima
df['enteros'].apply(lambda n: n/3)

0     33.333333
1     66.666667
2    100.000000
3    133.333333
Name: enteros, dtype: float64

In [16]:
# Borrar permanentemente una columna
del df['decimales']

In [17]:
df

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós


## Recuperar índices

In [18]:
# Índices de las columnas
df.columns

Index(['enteros', 'cadenas'], dtype='object')

In [19]:
# Índice de las filas
df.index

RangeIndex(start=0, stop=4, step=1)

## Aplicar ordenaciones

In [20]:
# Ordenar por columna (inplace=False por defecto)
df.sort_values(by='enteros')

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós


In [21]:
# Ordenar por columna inversamente (inplace=False por defecto)
df.sort_values(by='enteros',ascending=False)

Unnamed: 0,enteros,cadenas
3,400,adiós
2,300,hola
1,200,adiós
0,100,hola
