In [1]:
# 二位数据结构操作

In [2]:
import numpy as np
import pandas as pd

In [3]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [4]:
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [5]:
df = pd.DataFrame(d)

In [6]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [7]:
pd.DataFrame(d,index=['d','b','a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [8]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [9]:
# 如果指定了 `columns` 值，会去字典中寻找，找不到的值为 `NaN`：

In [10]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [11]:
df.columns

Index(['one', 'two'], dtype='object')

In [12]:
d = {'one' : [1., 2., 3., 4.],
     'two' : [4., 3., 2., 1.]}

In [13]:
d

{'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}

In [14]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [15]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [16]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
data[:] = [(1,2.,'Hello'), (2,3.,"World")]

data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [17]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [18]:
pd.DataFrame(data, index=['first', 'second'])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [19]:
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [20]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


In [21]:
# 从字典列表中构造

In [22]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [23]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [24]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [25]:
# 从series中构造

`DataFrame.from_dict` 从现有的一个字典中构造，`DataFrame.from_records` 从现有的一个记录数组中构造：

In [26]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [27]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])

  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [28]:
# 列操作

In [29]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [30]:
df['three'] = df['one'] * df['two']

df['flag'] = df['one'] > 2

df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [31]:
del df["two"]

three = df.pop("three")

df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [32]:
df['foo'] = 'bar'

df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [33]:
df['one_trunc'] = df['one'][:2]

df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


In [34]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [35]:
df['one'][:1]

a    1.0
Name: one, dtype: float64

In [36]:
df['one'][:2]

a    1.0
b    2.0
Name: one, dtype: float64

In [37]:
df.insert(1, 'bar', df['one'])

df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,


In [38]:
df.assign(test=df["one"] + df["bar"])

Unnamed: 0,one,bar,flag,foo,one_trunc,test
a,1.0,1.0,False,bar,1.0,2.0
b,2.0,2.0,False,bar,2.0,4.0
c,3.0,3.0,True,bar,,6.0
d,,,False,bar,,
