# 2.7. Introducción a Pandas III.

In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt
#%matplotlib inline

### Aplicación de funciones y Mapping

- Algunas funciones comunes:
<center>
<img src="imgs/pd8.png"  alt="drawing" width="700"/>
</center>


In [None]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),
                     index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

In [None]:
frame.sum()

In [None]:
frame.sum(0)

In [None]:
frame.sum(1)

In [None]:
?frame.sum

In [None]:
frame.sum(axis=1)

In [None]:
frame.sum(axis='columns')

In [None]:
frame.abs()

In [None]:
np.abs(frame)

- Con frame.apply(f) podemos aplicar la función f por filas o columnas.
- Por defecto apply es por filas.
- Usar el argumenot axis.

In [None]:
f = lambda x: x.max() - x.min()
frame.apply(f)

In [None]:
frame.apply(f, axis='columns')

In [None]:
frame.apply(f, axis=0)

In [None]:
frame.apply(f, axis=1)

- Podemos pasar funciones más complicadas, que retornen series

In [None]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])
frame.apply(f)

### Sorting

In [None]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj

- Ordenar el índice.

In [None]:
obj.sort_index()

In [None]:
frame = pd.DataFrame(np.arange(8).reshape((2, 4)),
                     index=['three', 'one'],
                     columns=['d', 'a', 'b', 'c'])
frame

In [None]:
frame.sort_index()

In [None]:
frame.sort_index(axis=1)

In [None]:
frame.sort_index(axis=1, ascending=False)

- Los valores también se pueden ordernar.

In [None]:
obj = pd.Series([4, 7, -3, 2])
obj

In [None]:
obj.sort_values()

- Con DataFrame pasar el argumento by.

In [None]:
frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})
frame

In [None]:
frame.sort_values()

In [None]:
frame.sort_values(by='b')

### Índices con Duplicados

In [None]:
obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
obj

- Para saber si tenemos índices únicos

In [None]:
obj.index.is_unique

In [None]:
obj['a']

In [None]:
obj['c']

## Resumen y cálculo de estadísticas descriptivas.

In [None]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                   [np.nan, np.nan], [0.75, -1.3]],
                  index=['a', 'b', 'c', 'd'],
                  columns=['one', 'two'])
df

In [None]:
df.sum()

In [None]:
df.sum(axis='columns')

In [None]:
df.mean(axis='columns', skipna=False)

In [None]:
df.cumsum()

In [None]:
df.describe()

In [None]:
obj = pd.Series(['a', 'a', 'b', 'c'] * 4)
obj

In [None]:
obj.describe()

### Ejemplo financiero

- Descarga de las cotizaciones de Apple.

In [None]:
import urllib.request
import certifi
import json

In [None]:
url = 'https://api.iextrading.com/1.0/stock/aapl/chart/5y'
contents = urllib.request.urlopen(url, cafile=certifi.where()).read()

In [None]:
datos_apple = pd.read_json(contents)

In [None]:
datos_apple = datos_apple[['open', 'high', 'low', 'close', 'volume']]

In [None]:
datos_apple.head()

In [None]:
datos_apple.close.plot()

In [None]:
returns = datos_apple['close'].pct_change()
returns.tail()

In [None]:
returns.hist()