In [53]:
import numpy as np
import pandas as pd

In [54]:
# 方法一:等长度列表或Numpy数组的字典
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': np.array([1.5, 1.7, 3.6, 2.4, 2.9, 3.2])}
frame1 = pd.DataFrame(data)
frame1

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [55]:
frame1.dtypes  # DataFrame每列的数据类型可以不一致

state     object
year       int64
pop      float64
dtype: object

In [56]:
# 如果额外指定列的顺序,DataFrame的列将会按照指定顺序排列
pd.DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [57]:
# 如果columns含有字典不包含的key,将会在结果中出现缺失值(index必须长度一致)
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                      index=['one', 'two', 'three', 'four', 'five', 'six'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [58]:
# 方法二:包含字典的嵌套字典(字典的健作为列,内部字典的健作为行索引)
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = pd.DataFrame(pop, dtype=np.float64)
print(frame3)
print(frame3.dtypes)

      Nevada  Ohio
2001     2.4   1.7
2002     2.9   3.6
2000     NaN   1.5
Nevada    float64
Ohio      float64
dtype: object


In [59]:
# 方法三:numpy ndarray构造DataFrame(或多维列表)
frame4 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                      dtype=np.int32,
                      columns=['a', 'b', 'c'])  # 创建时指定列名
print(frame4)
print(frame4.dtypes)

   a  b  c
0  1  2  3
1  4  5  6
2  7  8  9
a    int32
b    int32
c    int32
dtype: object


In [60]:
# 方法四:Series构造的字典
d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'e']),
     'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
frame5 = pd.DataFrame(d)
frame5

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,,3.0
d,,4.0
e,3.0,


In [61]:
# 方法五:DataFrame
# 应用:改变DataFrame index,columns顺序
frame6 = pd.DataFrame(frame5,
                      columns=['two', 'one'],
                      index=['b', 'a', 'c', 'd', 'e'])
frame6

Unnamed: 0,two,one
b,2.0,2.0
a,1.0,1.0
c,3.0,
d,4.0,
e,,3.0


In [62]:
# 创建空DataFrame
empty = pd.DataFrame([])
empty

In [63]:
empty["one"] = [1, 2, 3, 4]
empty

Unnamed: 0,one
0,1
1,2
2,3
3,4


In [64]:
# 单值创建DataFrame
index = [1, 2, 3]
columns = ["A", "B", "C", "D"]
pd.DataFrame(-1, index=index, columns=columns)

Unnamed: 0,A,B,C,D
1,-1,-1,-1,-1
2,-1,-1,-1,-1
3,-1,-1,-1,-1


In [65]:
# 全为NaN的DataFrmae
pd.DataFrame(index=index, columns=columns)  # 默认全为NaN

Unnamed: 0,A,B,C,D
1,,,,
2,,,,
3,,,,
