# DataFrame操作

python做数据科学的操作对象是：dataframe（数据框）

**dataframe是二维的，有行（Index）和列（Column），类似于excel表格**

对于数据集的操作全部基于pandas

In [77]:
import pandas as pd

# 读取数据集
df = pd.read_excel("data\\anova2.xlsx")

In [78]:
# 预览数据集
df.head()

Unnamed: 0,Blend,Crop,Yield
0,Blend X,Wheat,123
1,Blend X,Wheat,156
2,Blend X,Wheat,112
3,Blend X,Wheat,100
4,Blend X,Corn,128


<br><br>

---

<br><br>

In [79]:
# 查看数据集的行名称
df.index

RangeIndex(start=0, stop=24, step=1)

In [80]:
# 数据集列名称
df.columns

Index(['Blend', 'Crop', 'Yield'], dtype='object')

In [81]:
df.index = range(100, 124)
df.head()

Unnamed: 0,Blend,Crop,Yield
100,Blend X,Wheat,123
101,Blend X,Wheat,156
102,Blend X,Wheat,112
103,Blend X,Wheat,100
104,Blend X,Corn,128


In [82]:
df.columns = ['Blend2', 'Crop2', 'Yield2']
df.head()

Unnamed: 0,Blend2,Crop2,Yield2
100,Blend X,Wheat,123
101,Blend X,Wheat,156
102,Blend X,Wheat,112
103,Blend X,Wheat,100
104,Blend X,Corn,128


<br><br>

---

<br><br>

In [83]:
# 描述型统计分析
df.describe()

Unnamed: 0,Yield2
count,24.0
mean,145.541667
std,25.411362
min,100.0
25%,126.75
50%,142.5
75%,168.0
max,187.0


In [84]:
# 数据集信息
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 100 to 123
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Blend2  24 non-null     object
 1   Crop2   24 non-null     object
 2   Yield2  24 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 708.0+ bytes


<br><br>

---

<br><br>

In [85]:
# 删除Blend、Crop列
df1 = df.drop(['Blend2', 'Crop2'], axis=1)
df1.head()

Unnamed: 0,Yield2
100,123
101,156
102,112
103,100
104,128


In [86]:
# 选中Blend、Crop列
df2 = df.loc[:, ['Blend2', 'Crop2']]
df2.head()

Unnamed: 0,Blend2,Crop2
100,Blend X,Wheat
101,Blend X,Wheat
102,Blend X,Wheat
103,Blend X,Wheat
104,Blend X,Corn


In [87]:
# 调整列顺序
df2 = df2[['Crop2', 'Blend2']]
df2.head()

Unnamed: 0,Crop2,Blend2
100,Wheat,Blend X
101,Wheat,Blend X
102,Wheat,Blend X
103,Wheat,Blend X
104,Corn,Blend X


In [88]:
# 转置
df_T = df2.T
df_T.head()

Unnamed: 0,100,101,102,103,104,105,106,107,108,109,...,114,115,116,117,118,119,120,121,122,123
Crop2,Wheat,Wheat,Wheat,Wheat,Corn,Corn,Corn,Corn,Soy,Soy,...,Wheat,Wheat,Corn,Corn,Corn,Corn,Soy,Soy,Soy,Soy
Blend2,Blend X,Blend X,Blend X,Blend X,Blend X,Blend X,Blend X,Blend X,Blend X,Blend X,...,Blend Y,Blend Y,Blend Y,Blend Y,Blend Y,Blend Y,Blend Y,Blend Y,Blend Y,Blend Y


<br><br>

---

<br><br>

In [89]:
# 把df2粘贴都剪切板
df2.to_clipboard(excel=True)

# 把df2写入excel
df2.to_excel("data\\df2.xlsx", index=False)