In [1]:
import pandas as pd
import numpy as np

In [2]:
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.875507,-0.640603,0.803094,0.021134
2013-01-02,-2.344134,-0.849953,1.175674,0.818201
2013-01-03,0.93021,-0.147256,-0.129504,2.470906
2013-01-04,0.223134,0.570788,0.313121,1.832275
2013-01-05,-0.185226,-0.580592,0.013121,-0.377053
2013-01-06,-1.09899,-2.034249,-1.360057,-0.734068


## 浏览数据

In [3]:
# 显示前几行
print(df.head())
# 显示后几行(后三行)
print(df.tail(3))

# 显示索引，列和底层numpy数据
print(df.index)
print(df.columns)
print(df.values)

                   A         B         C         D
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275
2013-01-05 -0.185226 -0.580592  0.013121 -0.377053
                   A         B         C         D
2013-01-04  0.223134  0.570788  0.313121  1.832275
2013-01-05 -0.185226 -0.580592  0.013121 -0.377053
2013-01-06 -1.098990 -2.034249 -1.360057 -0.734068
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')
[[ 0.87550655 -0.64060339  0.80309435  0.02113396]
 [-2.3441345  -0.84995274  1.17567374  0.81820063]
 [ 0.93020986 -0.14725551 -0.12950428  2.47090573]
 [ 0.22313404  0.57078811  0.31312073  1.83227514]
 [-0.18522585 -0.5805917   0.01312053 -0.37705251]
 [-1.09898966 -2.03424918 -1

In [4]:
# 显示数据的统计摘要
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.266583,-0.613644,0.135908,0.671899
std,1.264298,0.859788,0.881032,1.273172
min,-2.344134,-2.034249,-1.360057,-0.734068
25%,-0.870549,-0.797615,-0.093848,-0.277506
50%,0.018954,-0.610598,0.163121,0.419667
75%,0.712413,-0.25559,0.680601,1.578757
max,0.93021,0.570788,1.175674,2.470906


In [6]:
# 转置数据
print(df.T)

# 按轴排序,ascending 为True为升序，为False为降序
print(df.sort_index(axis=1,ascending=False))

# 按值排序
print(df.sort_values(by='B'))

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06
A    0.875507   -2.344134    0.930210    0.223134   -0.185226   -1.098990
B   -0.640603   -0.849953   -0.147256    0.570788   -0.580592   -2.034249
C    0.803094    1.175674   -0.129504    0.313121    0.013121   -1.360057
D    0.021134    0.818201    2.470906    1.832275   -0.377053   -0.734068
                   D         C         B         A
2013-01-01  0.021134  0.803094 -0.640603  0.875507
2013-01-02  0.818201  1.175674 -0.849953 -2.344134
2013-01-03  2.470906 -0.129504 -0.147256  0.930210
2013-01-04  1.832275  0.313121  0.570788  0.223134
2013-01-05 -0.377053  0.013121 -0.580592 -0.185226
2013-01-06 -0.734068 -1.360057 -2.034249 -1.098990
                   A         B         C         D
2013-01-06 -1.098990 -2.034249 -1.360057 -0.734068
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-05 -0.185226 -0.580592  0.013121 -0.377053
2013-01-03  0.9302

## 选择数据

In [7]:
print(df)
# 选择单个列，产生Series，等效于df.A
print(df['A'])
# 通过[]选择，通过切片选择行
print(df[0:3])
print(df['20130102':'20130104'])

                   A         B         C         D
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275
2013-01-05 -0.185226 -0.580592  0.013121 -0.377053
2013-01-06 -1.098990 -2.034249 -1.360057 -0.734068
2013-01-01    0.875507
2013-01-02   -2.344134
2013-01-03    0.930210
2013-01-04    0.223134
2013-01-05   -0.185226
2013-01-06   -1.098990
Freq: D, Name: A, dtype: float64
                   A         B         C         D
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
                   A         B         C         D
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275


### 通过标签选择

In [10]:
print(df)
# 通过标签取得切片
print(df.loc[dates[0]])
# 通过标签选择多列
print(df.loc[:,['A','B']])
# 上述两种方法共同作用
print(df.loc['20130102':'20130104',['A','B']])

                   A         B         C         D
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275
2013-01-05 -0.185226 -0.580592  0.013121 -0.377053
2013-01-06 -1.098990 -2.034249 -1.360057 -0.734068
A    0.875507
B   -0.640603
C    0.803094
D    0.021134
Name: 2013-01-01 00:00:00, dtype: float64
                   A         B
2013-01-01  0.875507 -0.640603
2013-01-02 -2.344134 -0.849953
2013-01-03  0.930210 -0.147256
2013-01-04  0.223134  0.570788
2013-01-05 -0.185226 -0.580592
2013-01-06 -1.098990 -2.034249
                   A         B
2013-01-02 -2.344134 -0.849953
2013-01-03  0.930210 -0.147256
2013-01-04  0.223134  0.570788


In [15]:
print(df)
# 布尔索引
print(df[df.A>0])
print(df[df>0])
print(df[(df.A>0)&(df.B<0)])
# 通过isin()方法过滤
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
print(df2)
print(df2[df2['E'].isin(['two','four'])])

                   A         B         C         D
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-02 -2.344134 -0.849953  1.175674  0.818201
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275
2013-01-05 -0.185226 -0.580592  0.013121 -0.377053
2013-01-06 -1.098990 -2.034249 -1.360057 -0.734068
                   A         B         C         D
2013-01-01  0.875507 -0.640603  0.803094  0.021134
2013-01-03  0.930210 -0.147256 -0.129504  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275
                   A         B         C         D
2013-01-01  0.875507       NaN  0.803094  0.021134
2013-01-02       NaN       NaN  1.175674  0.818201
2013-01-03  0.930210       NaN       NaN  2.470906
2013-01-04  0.223134  0.570788  0.313121  1.832275
2013-01-05       NaN       NaN  0.013121       NaN
2013-01-06       NaN       NaN       NaN       NaN
                   A         B         C         D
2013-01-01  0.875507 -0.640603 