# Pandas Learning

## 习惯上做以下导入（import）

In [30]:
#习惯上做以下导入
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 创建对象（Object Creation）
### 使用传递的值列表序列创建序列, 让pandas创建默认整数索引

In [31]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [32]:
?pd.Series

### 使用传递的numpy数组创建数据帧,并使用日期索引和标记列.

In [33]:
dates = pd.date_range('20130101',periods=6)
dates
 

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [34]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.175259,-0.97742,1.033126,2.32732
2013-01-02,0.77475,-0.409247,-0.401034,-1.18328
2013-01-03,-0.175293,-0.680197,-1.345738,-0.405269
2013-01-04,-0.260514,-1.135197,1.158282,0.701759
2013-01-05,-0.983832,1.39314,0.822164,1.414659
2013-01-06,2.212881,-0.456198,1.708807,-0.364518


### 使用传递的可转换序列的字典对象创建数据帧.

In [35]:
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })

In [36]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [37]:
#所有明确的类型
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### 查看帧顶部和底部行

In [38]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.175259,-0.97742,1.033126,2.32732
2013-01-02,0.77475,-0.409247,-0.401034,-1.18328
2013-01-03,-0.175293,-0.680197,-1.345738,-0.405269
2013-01-04,-0.260514,-1.135197,1.158282,0.701759
2013-01-05,-0.983832,1.39314,0.822164,1.414659


In [39]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.260514,-1.135197,1.158282,0.701759
2013-01-05,-0.983832,1.39314,0.822164,1.414659
2013-01-06,2.212881,-0.456198,1.708807,-0.364518


### 显示索引,列,和底层numpy数据

In [40]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

### 描述显示数据快速统计摘要

In [41]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.232122,-0.37752,0.495935,0.415112
std,1.119913,0.913017,1.14027,1.309466
min,-0.983832,-1.135197,-1.345738,-1.18328
25%,-0.239209,-0.903114,-0.095234,-0.395081
50%,-0.175276,-0.568197,0.927645,0.168621
75%,0.537247,-0.420985,1.126993,1.236434
max,2.212881,1.39314,1.708807,2.32732


### 转置数据

In [42]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.175259,0.77475,-0.175293,-0.260514,-0.983832,2.212881
B,-0.97742,-0.409247,-0.680197,-1.135197,1.39314,-0.456198
C,1.033126,-0.401034,-1.345738,1.158282,0.822164,1.708807
D,2.32732,-1.18328,-0.405269,0.701759,1.414659,-0.364518


### 按轴排序

In [43]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,2.32732,1.033126,-0.97742,-0.175259
2013-01-02,-1.18328,-0.401034,-0.409247,0.77475
2013-01-03,-0.405269,-1.345738,-0.680197,-0.175293
2013-01-04,0.701759,1.158282,-1.135197,-0.260514
2013-01-05,1.414659,0.822164,1.39314,-0.983832
2013-01-06,-0.364518,1.708807,-0.456198,2.212881


### 按值排序

In [45]:
df.sort(columns='B')

AttributeError: 'DataFrame' object has no attribute 'sort'

## 选择器（Selection）