In [1]:
import pandas as pd
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [2]:
# 将Series看做字典
data['b']

0.5

In [3]:
'a' in data

True

In [4]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [5]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [6]:
data['e'] = 2.33
data

a    0.25
b    0.50
c    0.75
d    1.00
e    2.33
dtype: float64

In [7]:
# 将Series看做一维数组
# 显式索引包含最后元素
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [8]:
data[0:2]

a    0.25
b    0.50
dtype: float64

In [9]:
data[(data > 0.9) & (data < 2.5)]

d    1.00
e    2.33
dtype: float64

In [10]:
data[['a', 'e']]

a    0.25
e    2.33
dtype: float64

In [11]:
# 为减少Series索引为整数带来的冲突
data = pd.Series(['a', 'b', 'c', 'd'], [1, 2, 3, 4])
data

1    a
2    b
3    c
4    d
dtype: object

In [12]:
# loc()显式
data.loc[1]

'a'

In [13]:
data.loc[2:3]

2    b
3    c
dtype: object

In [14]:
data.loc[[4, 2]]

4    d
2    b
dtype: object

In [15]:
# iloc()隐式
data.iloc[1:3]

2    b
3    c
dtype: object

In [16]:
data.iloc[[0, 2]]

1    a
3    c
dtype: object

In [17]:
# 将dataframe看做字典
area = pd.Series({'Beijing': 10, 'Nanjin': 20, 'Hubei': 30, 'Jiangsu': 40})
pop = pd.Series({'Beijing': 50, 'Nanjin': 60, 'Jiangsu': 70, 'Hubei': 80})
data = pd.DataFrame({'Area': area, 'Population': pop})
data

Unnamed: 0,Area,Population
Beijing,10,50
Hubei,30,80
Jiangsu,40,70
Nanjin,20,60


In [18]:
data['Area']

Beijing    10
Hubei      30
Jiangsu    40
Nanjin     20
Name: Area, dtype: int64

In [19]:
data.Population

Beijing    50
Hubei      80
Jiangsu    70
Nanjin     60
Name: Population, dtype: int64

In [20]:
data.Population is data['Population']

True

In [21]:
data['Density'] = data['Population'] / data['Area']
data

Unnamed: 0,Area,Population,Density
Beijing,10,50,5.0
Hubei,30,80,2.666667
Jiangsu,40,70,1.75
Nanjin,20,60,3.0


In [22]:
# 将dataframe看做二维数组
data.values

array([[10.        , 50.        ,  5.        ],
       [30.        , 80.        ,  2.66666667],
       [40.        , 70.        ,  1.75      ],
       [20.        , 60.        ,  3.        ]])

In [23]:
data.T

Unnamed: 0,Beijing,Hubei,Jiangsu,Nanjin
Area,10.0,30.0,40.0,20.0
Population,50.0,80.0,70.0,60.0
Density,5.0,2.666667,1.75,3.0


In [24]:
data.values[0]

array([10., 50.,  5.])

In [25]:
data['Area']

Beijing    10
Hubei      30
Jiangsu    40
Nanjin     20
Name: Area, dtype: int64

In [26]:
data.iloc[:3, :2]

Unnamed: 0,Area,Population
Beijing,10,50
Hubei,30,80
Jiangsu,40,70


In [27]:
data.loc[:'Nanjin', :'Area']

Unnamed: 0,Area
Beijing,10
Hubei,30
Jiangsu,40
Nanjin,20


In [28]:
# ix()混合效果
data.ix[: 'Hubei', : 2]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Unnamed: 0,Area,Population
Beijing,10,50
Hubei,30,80


In [29]:
data.loc[data.Density > 2, ['Area', 'Population']]

Unnamed: 0,Area,Population
Beijing,10,50
Hubei,30,80
Nanjin,20,60


In [30]:
data[data.Density > 3]

Unnamed: 0,Area,Population,Density
Beijing,10,50,5.0
