In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from pandas import Series, DataFrame

In [3]:
%matplotlib inline

#### 5.1 Series

In [6]:
obj = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj, obj['a']

(d    4
 b    7
 a   -5
 c    3
 dtype: int64, -5)

In [7]:
obj[['c', 'a', 'd']]

c    3
a   -5
d    4
dtype: int64

In [8]:
obj[obj>0]

d    4
b    7
c    3
dtype: int64

In [9]:
obj*2

d     8
b    14
a   -10
c     6
dtype: int64

In [10]:
np.exp(obj)

d      54.598150
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [13]:
'a' in obj

True

In [16]:
sdata = {
    'Ohio': 35000,
    'Texas': 71000,
    'Oregon': 16000,
    'Utah': 5000
}
obj3 = Series(sdata)
obj3

Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [18]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [19]:
pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [20]:
obj3, obj4

(Ohio      35000
 Oregon    16000
 Texas     71000
 Utah       5000
 dtype: int64, California        NaN
 Ohio          35000.0
 Oregon        16000.0
 Texas         71000.0
 dtype: float64)

In [21]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [22]:
obj4.name = 'population'

In [23]:
obj4.index.name = 'state'
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [24]:
obj

d    4
b    7
a   -5
c    3
dtype: int64

In [25]:
obj.index = [1, 2, 3, 4]
obj

1    4
2    7
3   -5
4    3
dtype: int64

#### 5.2 DataFrame

In [27]:
data = {
    'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.9]
}
frame = DataFrame(data)
frame

Unnamed: 0,pop,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002


In [28]:
frame.T

Unnamed: 0,0,1,2,3,4
pop,1.5,1.7,3.6,2.4,2.9
state,Ohio,Ohio,Ohio,Nevada,Nevada
year,2000,2001,2002,2001,2002


In [30]:
DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [54]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], 
                   index=['one', 'two', 'three', 'four', 'five'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


In [55]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [56]:
# 按列取出数据。Series

s1 = frame2['state']
s1

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
Name: state, dtype: object

In [57]:
s1.index, s1.values

(Index(['one', 'two', 'three', 'four', 'five'], dtype='object'),
 array(['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], dtype=object))

In [58]:
# 按照行取出Series
s2 = frame2.ix['three']
s2.index, s2.values

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


(Index(['year', 'state', 'pop', 'debt'], dtype='object'),
 array([2002, 'Ohio', 3.6000000000000001, nan], dtype=object))

In [59]:
s2

year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object

In [60]:
frame2['debt'] = np.arange(5    )

In [61]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0
two,2001,Ohio,1.7,1
three,2002,Ohio,3.6,2
four,2001,Nevada,2.4,3
five,2002,Nevada,2.9,4


In [43]:
val = Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
val

two    -1.2
four   -1.5
five   -1.7
dtype: float64

In [52]:
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt,eastern
one,2000,Ohio,1.5,,True
two,2001,Ohio,1.7,-1.2,True
three,2002,Ohio,3.6,,True
four,2001,Nevada,2.4,-1.5,False
five,2002,Nevada,2.9,-1.7,False


In [62]:
frame2['eastern'] = frame2.state == 'Ohio'
frame2

Unnamed: 0,year,state,pop,debt,eastern
one,2000,Ohio,1.5,0,True
two,2001,Ohio,1.7,1,True
three,2002,Ohio,3.6,2,True
four,2001,Nevada,2.4,3,False
five,2002,Nevada,2.9,4,False


In [63]:
del frame2['eastern']

In [64]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0
two,2001,Ohio,1.7,1
three,2002,Ohio,3.6,2
four,2001,Nevada,2.4,3
five,2002,Nevada,2.9,4
