## Pandas
### 基于Numpy的高级数据结构和精巧工具，快速简单的处理数据
- 自动或明确的数据对齐
- 时间序列功能
- 以相同的数据结构来处理时间序列和非时间序列
- 支持传递元数据（坐标轴标签）的算术运算和缩减
- 处理丢失数据，缺失值

### 数据结构
- Series
- DataFrame

In [1]:
import pandas as pd
import numpy as np
a = pd.Series([1,3,5,np.nan,6,8])
a

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [2]:
dates = pd.date_range('2013-01-01', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.810479,1.285681,-1.029797,1.384755
2013-01-02,0.535963,1.640086,-0.48009,0.745207
2013-01-03,0.113613,-0.003081,-0.102008,1.395351
2013-01-04,0.431961,-1.191429,0.907321,1.340646
2013-01-05,-0.511627,-0.297069,-1.120703,-0.942699
2013-01-06,0.401615,0.298942,-0.348988,-1.016558


In [4]:
print(df.head())
df.head(1)

                   A         B         C         D
2013-01-01 -0.810479  1.285681 -1.029797  1.384755
2013-01-02  0.535963  1.640086 -0.480090  0.745207
2013-01-03  0.113613 -0.003081 -0.102008  1.395351
2013-01-04  0.431961 -1.191429  0.907321  1.340646
2013-01-05 -0.511627 -0.297069 -1.120703 -0.942699


Unnamed: 0,A,B,C,D
2013-01-01,-0.810479,1.285681,-1.029797,1.384755


In [5]:
print(df.tail())
print(df.tail(2))

                   A         B         C         D
2013-01-02  0.535963  1.640086 -0.480090  0.745207
2013-01-03  0.113613 -0.003081 -0.102008  1.395351
2013-01-04  0.431961 -1.191429  0.907321  1.340646
2013-01-05 -0.511627 -0.297069 -1.120703 -0.942699
2013-01-06  0.401615  0.298942 -0.348988 -1.016558
                   A         B         C         D
2013-01-05 -0.511627 -0.297069 -1.120703 -0.942699
2013-01-06  0.401615  0.298942 -0.348988 -1.016558


In [6]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.026841,0.288855,-0.362377,0.48445
std,0.558993,1.043011,0.736813,1.16027
min,-0.810479,-1.191429,-1.120703,-1.016558
25%,-0.355317,-0.223572,-0.89237,-0.520722
50%,0.257614,0.147931,-0.414539,1.042926
75%,0.424374,1.038997,-0.163753,1.373728
max,0.535963,1.640086,0.907321,1.395351


In [7]:
df.T # transpose

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.810479,0.535963,0.113613,0.431961,-0.511627,0.401615
B,1.285681,1.640086,-0.003081,-1.191429,-0.297069,0.298942
C,-1.029797,-0.48009,-0.102008,0.907321,-1.120703,-0.348988
D,1.384755,0.745207,1.395351,1.340646,-0.942699,-1.016558


In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.810479,1.285681,-1.029797,1.384755
2013-01-02,0.535963,1.640086,-0.48009,0.745207
2013-01-03,0.113613,-0.003081,-0.102008,1.395351
2013-01-04,0.431961,-1.191429,0.907321,1.340646
2013-01-05,-0.511627,-0.297069,-1.120703,-0.942699
2013-01-06,0.401615,0.298942,-0.348988,-1.016558


In [9]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,0.431961,-1.191429,0.907321,1.340646
2013-01-05,-0.511627,-0.297069,-1.120703,-0.942699
2013-01-03,0.113613,-0.003081,-0.102008,1.395351
2013-01-06,0.401615,0.298942,-0.348988,-1.016558
2013-01-01,-0.810479,1.285681,-1.029797,1.384755
2013-01-02,0.535963,1.640086,-0.48009,0.745207
