In [2]:
# 导入包
import numpy as np
import pandas as pd

## DataFrame构建

### 用字典构造DataFrame

In [3]:
# 1. 数组、列表或元组构成的字典构造DataFrame
# 构造字典
data = {"a": [1, 2, 3, 4, 5], "b": (6, 7, 8, 9, 10), "c": np.arange(11, 16)}
# 构造DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,6,11
1,2,7,12
2,3,8,13
3,4,9,14
4,5,10,15


In [4]:
# index属性查看df行索引
df.index

RangeIndex(start=0, stop=5, step=1)

In [6]:
# columns属性查看df列索引
df.columns

Index(['a', 'b', 'c'], dtype='object')

In [7]:
# values属性查看df列索引
df.values

array([[ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14],
       [ 5, 10, 15]])

In [8]:
# 构造DataFrame，指定行索引
df2 = pd.DataFrame(data, index=["A", "B", "C", "D", "E"])
df2

Unnamed: 0,a,b,c
A,1,6,11
B,2,7,12
C,3,8,13
D,4,9,14
E,5,10,15


In [10]:
# 构造DataFrame，指定列索引
df3 = pd.DataFrame(data, index=["A", "B", "C", "D", "E"], columns=["a", "b", "c", "d"])
df3

Unnamed: 0,a,b,c,d
A,1,6,11,
B,2,7,12,
C,3,8,13,
D,4,9,14,
E,5,10,15,


In [12]:
# 2. Series构成的字典构造DataFrame
# 构造字典
data2 = {
    "a": pd.Series(np.arange(5)),
    "b": pd.Series(np.arange(4, 8)),
    "c": pd.Series(np.arange(8, 12)),
}
# 构造DataFrame
df4 = pd.DataFrame(data2)
df4

Unnamed: 0,a,b,c
0,0,4.0,8.0
1,1,5.0,9.0
2,2,6.0,10.0
3,3,7.0,11.0
4,4,,


In [None]:
# 简化版写法
df4 = pd.DataFrame(
    {
        "a": pd.Series(np.arange(4)),
        "b": pd.Series(np.arange(4, 8)),
        "c": pd.Series(np.arange(8, 12)),
    }
)

In [14]:
# 3. 字典构成的字典构造DataFrame
# 构造字典（字典嵌套）
data3 = {
    "a": {"手机": 4999, "平板": 2799, "笔记本": 4599},
    "b": {"手机": 5999, "平板": 3799, "笔记本": 5299},
    "c": {"手机": 6999, "平板": 4799, "笔记本": 6499},
}
# 构造DataFrame
df5 = pd.DataFrame(data3)
df5

Unnamed: 0,a,b,c
手机,4999,5999,6999
平板,2799,3799,4799
笔记本,4599,5299,6499


### 用列表构造DataFrame

In [15]:
# 1. 二维ndarray构造DataFrame
# 构造二维数组
arr = np.arange(12).reshape(4, 3)
# 构造DataFrame
df6 = pd.DataFrame(arr)
df6

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [17]:
# 2. 字典构成的列表构造DataFrame
# 构造列表
list1 = [
    {"手机": 4999, "平板": 2799, "笔记本": 4599},
    {"手机": 5999, "平板": 3799, "笔记本": 5299},
    {"手机": 6999, "平板": 4799, "笔记本": 6499},
]
# 构造DataFrame
df7 = pd.DataFrame(list1)
df7

Unnamed: 0,手机,平板,笔记本
0,4999,2799,4599
1,5999,3799,5299
2,6999,4799,6499


In [20]:
# 3. Series构成的列表构造DataFrame
# 构造列表
list2 = [
    pd.Series(np.random.rand(3)),
    pd.Series(np.random.rand(2)),
    pd.Series(np.random.rand(4)),
]
# 构造DataFrame
df8 = pd.DataFrame(list2)
df8

Unnamed: 0,0,1,2,3
0,0.615171,0.729912,0.392151,
1,0.200781,0.66731,,
2,0.908103,0.364,0.719871,0.352662


## DataFrame基本用法

In [25]:
# 创建示例DataFrame
df9 = pd.DataFrame(
    np.arange(16).reshape(4, 4),
    index=["a", "b", "c", "d"],
    columns=["A", "B", "C", "D"],
)
df9

Unnamed: 0,A,B,C,D
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [26]:
# 1. T转置
df9.T

Unnamed: 0,a,b,c,d
A,0,4,8,12
B,1,5,9,13
C,2,6,10,14
D,3,7,11,15


In [30]:
# 2. 通过列索引获取列数据
print(df9["C"])
print(type(df9["C"]))

a     2
b     6
c    10
d    14
Name: C, dtype: int64
<class 'pandas.core.series.Series'>


In [31]:
# 3. 增加数据列
df9["E"] = 99
df9

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,99
b,4,5,6,7,99
c,8,9,10,11,99
d,12,13,14,15,99


In [32]:
# 3. 增加数据列
df9["E"] = [2, 3, 4, 5]
df9

Unnamed: 0,A,B,C,D,E
a,0,1,2,3,2
b,4,5,6,7,3
c,8,9,10,11,4
d,12,13,14,15,5


In [33]:
# 4. 删除列数据
del(df9["E"])
df9

Unnamed: 0,A,B,C,D
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15
