# Pandas

Se dará un repaso general del paquetes __Pandas__ utiliando estos tutoriales
* http://pandas.pydata.org/pandas-docs/stable/10min.html
* https://www.analyticsvidhya.com/blog/2016/01/12-pandas-techniques-python-data-manipulation/
* [Data Munging in Python (using Pandas) – Baby steps in Python](https://www.analyticsvidhya.com/blog/2014/09/data-munging-python-using-pandas-baby-steps-python/)

In [1]:
# Librerias a utilizar en el módulo de Pandas
import pandas as pd
import numpy as np
import matplotlib as plt

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])   #Generar un serie de números
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
#Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:
#?pd.date_range  #Ver el help
dates = pd.date_range('2013 01 01', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
#Crear un dataframe pasandole un array de numpy 
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.211224,0.012319,0.641143,-0.14009
2013-01-02,-1.776084,0.667507,-0.136253,-0.358499
2013-01-03,-0.142,-1.043172,-0.210231,0.701546
2013-01-04,-1.236635,0.13533,-0.675865,0.438129
2013-01-05,0.285367,1.416518,0.350277,-1.11321
2013-01-06,-1.473272,-1.079103,1.557857,1.41299


In [9]:
#Crear un dataframe pasandole un diccionario de objetos que pueden convertirse a series
df2 = pd.DataFrame({ 'A' : 1.,
                     'B' : pd.Timestamp('20130102'),
                     'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                     'D' : np.array([3] * 4,dtype='int32'),
                     'E' : pd.Categorical(["test","train","test","train"]),
                     'F' : 'foo' })

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [15]:
#Obteniendo los data types
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.211224,0.012319,0.641143,-0.14009
2013-01-02,-1.776084,0.667507,-0.136253,-0.358499
2013-01-03,-0.142,-1.043172,-0.210231,0.701546
2013-01-04,-1.236635,0.13533,-0.675865,0.438129
2013-01-05,0.285367,1.416518,0.350277,-1.11321


In [16]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-1.236635,0.13533,-0.675865,0.438129
2013-01-05,0.285367,1.416518,0.350277,-1.11321
2013-01-06,-1.473272,-1.079103,1.557857,1.41299


In [17]:
#Mostrar index, columnas y datos de numpy
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [19]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [20]:
df.values

array([[ 0.21122364,  0.01231897,  0.64114319, -0.14009032],
       [-1.77608437,  0.66750693, -0.13625328, -0.35849895],
       [-0.1420003 , -1.04317225, -0.2102313 ,  0.7015459 ],
       [-1.23663496,  0.13532996, -0.67586474,  0.43812908],
       [ 0.28536675,  1.41651754,  0.35027676, -1.11321009],
       [-1.47327207, -1.07910314,  1.55785746,  1.41299017]])

In [22]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.688567,0.018233,0.254488,0.156811
std,0.911673,0.971723,0.786339,0.885495
min,-1.776084,-1.079103,-0.675865,-1.11321
25%,-1.414113,-0.779299,-0.191737,-0.303897
50%,-0.689318,0.073824,0.107012,0.149019
75%,0.122918,0.534463,0.568427,0.635692
max,0.285367,1.416518,1.557857,1.41299


In [23]:
#Transponiendo los datos
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.211224,-1.776084,-0.142,-1.236635,0.285367,-1.473272
B,0.012319,0.667507,-1.043172,0.13533,1.416518,-1.079103
C,0.641143,-0.136253,-0.210231,-0.675865,0.350277,1.557857
D,-0.14009,-0.358499,0.701546,0.438129,-1.11321,1.41299


In [27]:
#Ordenando por eje
df.sort_index(axis=1,ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.14009,0.641143,0.012319,0.211224
2013-01-02,-0.358499,-0.136253,0.667507,-1.776084
2013-01-03,0.701546,-0.210231,-1.043172,-0.142
2013-01-04,0.438129,-0.675865,0.13533,-1.236635
2013-01-05,-1.11321,0.350277,1.416518,0.285367
2013-01-06,1.41299,1.557857,-1.079103,-1.473272


In [25]:
#Ordenando por valores
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,-1.473272,-1.079103,1.557857,1.41299
2013-01-03,-0.142,-1.043172,-0.210231,0.701546
2013-01-01,0.211224,0.012319,0.641143,-0.14009
2013-01-04,-1.236635,0.13533,-0.675865,0.438129
2013-01-02,-1.776084,0.667507,-0.136253,-0.358499
2013-01-05,0.285367,1.416518,0.350277,-1.11321
