# Pandas Learning

## 习惯上做以下导入（import）

In [2]:
#习惯上做以下导入
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 创建对象（Object Creation）
### 使用传递的值列表序列创建序列, 让pandas创建默认整数索引

In [3]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
?pd.Series

### 使用传递的numpy数组创建数据帧,并使用日期索引和标记列.

In [5]:
dates = pd.date_range('20130101',periods=6)
dates
 

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.755106,0.038395,-2.679681,-0.387482
2013-01-02,0.590029,-0.170177,-0.427307,1.860307
2013-01-03,-0.889593,-0.787098,-0.195277,-0.279759
2013-01-04,2.056697,1.746963,-1.267537,-1.577925
2013-01-05,1.898102,0.353501,-1.680767,0.514604
2013-01-06,0.03666,-0.101189,1.478656,-0.129529


### 使用传递的可转换序列的字典对象创建数据帧.

In [7]:
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })

In [8]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [9]:
#所有明确的类型
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### 查看帧顶部和底部行

In [10]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.755106,0.038395,-2.679681,-0.387482
2013-01-02,0.590029,-0.170177,-0.427307,1.860307
2013-01-03,-0.889593,-0.787098,-0.195277,-0.279759
2013-01-04,2.056697,1.746963,-1.267537,-1.577925
2013-01-05,1.898102,0.353501,-1.680767,0.514604


In [11]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,2.056697,1.746963,-1.267537,-1.577925
2013-01-05,1.898102,0.353501,-1.680767,0.514604
2013-01-06,0.03666,-0.101189,1.478656,-0.129529


### 显示索引,列,和底层numpy数据

In [12]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

### 描述显示数据快速统计摘要

In [13]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.489465,0.180066,-0.795319,3.6e-05
std,1.273293,0.853522,1.429984,1.136714
min,-0.889593,-0.787098,-2.679681,-1.577925
25%,-0.557164,-0.15293,-1.57746,-0.360551
50%,0.313345,-0.031397,-0.847422,-0.204644
75%,1.571084,0.274725,-0.253284,0.353571
max,2.056697,1.746963,1.478656,1.860307


### 转置数据

In [14]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.755106,0.590029,-0.889593,2.056697,1.898102,0.03666
B,0.038395,-0.170177,-0.787098,1.746963,0.353501,-0.101189
C,-2.679681,-0.427307,-0.195277,-1.267537,-1.680767,1.478656
D,-0.387482,1.860307,-0.279759,-1.577925,0.514604,-0.129529


### 按轴排序

In [15]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.387482,-2.679681,0.038395,-0.755106
2013-01-02,1.860307,-0.427307,-0.170177,0.590029
2013-01-03,-0.279759,-0.195277,-0.787098,-0.889593
2013-01-04,-1.577925,-1.267537,1.746963,2.056697
2013-01-05,0.514604,-1.680767,0.353501,1.898102
2013-01-06,-0.129529,1.478656,-0.101189,0.03666


### 按值排序

In [16]:
df.sort(columns='B')

AttributeError: 'DataFrame' object has no attribute 'sort'

## 选择器（Selection）