# 二维异构表 `pandas.DataFrame`

In [1]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

plt.rcParams['font.serif'] = ['STSong', 'SimSun', 'SimSun-ExtB'] + plt.rcParams['font.serif']
plt.rcParams['font.serif'] = ['STFangson', 'FangSong'] + plt.rcParams['font.serif']
plt.rcParams['font.serif'] = ['STKaiti', 'KaiTi'] + plt.rcParams['font.serif']
plt.rcParams['font.sans-serif'] = ['SimHei'] + plt.rcParams['font.sans-serif']
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] + plt.rcParams['font.sans-serif']
plt.rcParams['font.sans-serif'] = ['STXihei'] + plt.rcParams['font.sans-serif']

___
## 加载数据

### 从 `.csv` 文件加载数据

In [2]:
df = pd.read_csv("https://download.mlcc.google.cn/mledu-datasets/california_housing_train.csv", sep=",")
df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.40,19.0,7650.0,1901.0,1129.0,463.0,1.8200,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.9250,65500.0
...,...,...,...,...,...,...,...,...,...
16995,-124.26,40.58,52.0,2217.0,394.0,907.0,369.0,2.3571,111400.0
16996,-124.27,40.69,36.0,2349.0,528.0,1194.0,465.0,2.5179,79000.0
16997,-124.30,41.84,17.0,2677.0,531.0,1244.0,456.0,3.0313,103600.0
16998,-124.30,41.80,19.0,2672.0,552.0,1298.0,478.0,1.9797,85800.0


### 从列 `pandas.Series` 加载数据

In [3]:
s1 = pd.Series(['北京', '上海', '广州'])
s2 = pd.Series([16410, 6430, 7434])
df = pd.DataFrame({'城市': s1, '占地（平方千米）': s2})
df

Unnamed: 0,城市,占地（平方千米）
0,北京,16410
1,上海,6430
2,广州,7434


___
## 展示数据

### 展示统计信息（`DataFrame.describe()`）

In [4]:
df.describe()

Unnamed: 0,占地（平方千米）
count,3.0
mean,10091.333333
std,5495.10376
min,6430.0
25%,6932.0
50%,7434.0
75%,11922.0
max,16410.0


### 展示前几条数据（`DataFrame.head()`）

In [5]:
df.head()

Unnamed: 0,城市,占地（平方千米）
0,北京,16410
1,上海,6430
2,广州,7434


### 绘制每列数据的直方图（`DataFrame.hist()`）

In [6]:
df.hist()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

array([[<AxesSubplot:title={'center':'占地（平方千米）'}>]], dtype=object)

___
##  访问数据

### 访问一列

In [7]:
df['城市']

0    北京
1    上海
2    广州
Name: 城市, dtype: object

### 访问一项

In [8]:
df['城市'][1]

'上海'

### 访问一行

In [9]:
df[0:1]

Unnamed: 0,城市,占地（平方千米）
0,北京,16410


___
## 操控数据 

### 添加一列

In [10]:
df['人口（万人）'] = pd.Series([2153.6, 2428.14, 1530.59])
df

Unnamed: 0,城市,占地（平方千米）,人口（万人）
0,北京,16410,2153.6
1,上海,6430,2428.14
2,广州,7434,1530.59


### 添加一行

In [11]:
df.loc[df.shape[0]] = {'城市': '深圳', '占地（平方千米）': 1997, '人口（万人）': 1343.88}
df

Unnamed: 0,城市,占地（平方千米）,人口（万人）
0,北京,16410,2153.6
1,上海,6430,2428.14
2,广州,7434,1530.59
3,深圳,1997,1343.88


### 随机重建索引

In [12]:
df.reindex(np.random.permutation(df.index))

Unnamed: 0,城市,占地（平方千米）,人口（万人）
1,上海,6430,2428.14
2,广州,7434,1530.59
0,北京,16410,2153.6
3,深圳,1997,1343.88
