# Operaciones

Los `DataFrames` ofrecen varias operaciones básicas, el listado completo se puede encontrar en la [documentación](https://pandas.pydata.org/docs/user_guide/basics.html). 

## Rango de fechas

In [3]:
import pandas as pd
import numpy as np

# Rango de fechas para usar de índice en un dataframe
fechas = pd.date_range("08/04/2023", periods=20) #Genera 20 dias a partir de cierta fecha

fechas

DatetimeIndex(['2023-08-04', '2023-08-05', '2023-08-06', '2023-08-07',
               '2023-08-08', '2023-08-09', '2023-08-10', '2023-08-11',
               '2023-08-12', '2023-08-13', '2023-08-14', '2023-08-15',
               '2023-08-16', '2023-08-17', '2023-08-18', '2023-08-19',
               '2023-08-20', '2023-08-21', '2023-08-22', '2023-08-23'],
              dtype='datetime64[ns]', freq='D')

## Consultas rápidas

In [4]:
# Lo utilizamos para rellenar un df con valores aleatorios
#20 filas, 4 columnas. Usando como index las fechas anteriores y columnas al gusto
df = pd.DataFrame(np.random.randn(20, 4), index=fechas, columns=["A", "B", "C", "D"])

df

Unnamed: 0,A,B,C,D
2023-08-04,0.426949,0.840222,-0.232874,-1.698797
2023-08-05,0.32643,-0.162327,1.557541,0.506056
2023-08-06,-1.12921,-0.393665,-0.405837,-1.235995
2023-08-07,-1.562617,0.769905,-0.00402,-0.575096
2023-08-08,0.604248,0.043948,1.488435,0.351583
2023-08-09,0.334357,0.471756,0.86236,-1.27877
2023-08-10,1.260706,0.434635,0.5559,-0.945319
2023-08-11,0.543238,0.031334,-1.330968,0.714981
2023-08-12,-1.291275,-2.376736,0.926573,1.349529
2023-08-13,-0.403799,-1.425671,-1.267554,-0.505495


In [5]:
# Primeras 5 filas (cabeza)
df.head()

Unnamed: 0,A,B,C,D
2023-08-04,0.426949,0.840222,-0.232874,-1.698797
2023-08-05,0.32643,-0.162327,1.557541,0.506056
2023-08-06,-1.12921,-0.393665,-0.405837,-1.235995
2023-08-07,-1.562617,0.769905,-0.00402,-0.575096
2023-08-08,0.604248,0.043948,1.488435,0.351583


In [6]:
# Primeras tres filas
df.head(3)

Unnamed: 0,A,B,C,D
2023-08-04,0.426949,0.840222,-0.232874,-1.698797
2023-08-05,0.32643,-0.162327,1.557541,0.506056
2023-08-06,-1.12921,-0.393665,-0.405837,-1.235995


In [7]:
# Últimas 5 filas (cola)
df.tail()

Unnamed: 0,A,B,C,D
2023-08-19,-1.207194,0.353953,-0.18165,-1.491527
2023-08-20,-0.882318,-0.513279,1.384555,-1.838723
2023-08-21,0.319353,-0.554913,-0.145633,-0.365036
2023-08-22,-1.043198,-0.616661,0.032352,-0.11519
2023-08-23,-0.118775,0.676636,0.725511,0.574854


In [8]:
# Últimas tres filas
df.tail(3)

Unnamed: 0,A,B,C,D
2023-08-21,0.319353,-0.554913,-0.145633,-0.365036
2023-08-22,-1.043198,-0.616661,0.032352,-0.11519
2023-08-23,-0.118775,0.676636,0.725511,0.574854


## Valores únicos

In [9]:
# Definimos un DataFrame con información de diferentes tipos
df = pd.DataFrame({
      'enteros': [100, 200, 300, 400],
    'decimales': [3.14, 2.72, 1.618, 3.14],
      'cadenas': ['hola','adiós','hola','adiós']})

df

Unnamed: 0,enteros,decimales,cadenas
0,100,3.14,hola
1,200,2.72,adiós
2,300,1.618,hola
3,400,3.14,adiós


In [10]:
# Array de valores únicos de una columna
df['cadenas'].unique()

array(['hola', 'adiós'], dtype=object)

In [11]:
# Contador de valores únicos de una columna
df['cadenas'].nunique()

2

In [12]:
# Dataframe con los de valores únicos y su contador de una columna
df['cadenas'].value_counts()

cadenas
hola     2
adiós    2
Name: count, dtype: int64

In [13]:
df['decimales'].value_counts()

decimales
3.140    2
2.720    1
1.618    1
Name: count, dtype: int64

## Aplicación de funciones

In [14]:
# Método interno de las Series columna
df['decimales'].sum()

10.618

In [15]:
# Aplicar una función predefinida
df['cadenas'].apply(len) #Para cada registro devuelve su longitud(caracteres)

0    4
1    5
2    4
3    5
Name: cadenas, dtype: int64

In [16]:
# Aplicar una función definida
def doblar(n):
    return n*2

df['enteros'].apply(doblar)

0    200
1    400
2    600
3    800
Name: enteros, dtype: int64

In [17]:
# Aplicar una función anónima
df['enteros'].apply(lambda n: n/3)

0     33.333333
1     66.666667
2    100.000000
3    133.333333
Name: enteros, dtype: float64

In [18]:
# Borrar permanentemente una columna
del df['decimales']

In [19]:
df

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós


## Recuperar índices

In [20]:
# Índices de las columnas
df.columns

Index(['enteros', 'cadenas'], dtype='object')

In [23]:
for index in df.columns:
    print(index)

enteros
cadenas


In [21]:
# Índice de las filas
df.index

RangeIndex(start=0, stop=4, step=1)

In [22]:
for index in df.index:
    print(index)

0
1
2
3


## Aplicar ordenaciones

In [24]:
# Ordenar por columna (inplace=False por defecto)
df.sort_values(by='enteros')  #Ordena de menor a mayor

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós


In [25]:
# Ordenar por columna inversamente (inplace=False por defecto, con True guardara el cambio)
df.sort_values(by='enteros',ascending=False)   #Ordena de mayor a menor

Unnamed: 0,enteros,cadenas
3,400,adiós
2,300,hola
1,200,adiós
0,100,hola


In [26]:
df

Unnamed: 0,enteros,cadenas
0,100,hola
1,200,adiós
2,300,hola
3,400,adiós
