# 结构化网格

探索多维度数据，最好的方式是将数据集中不同子集画在同一张图。

In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt

In [3]:
sns.set(style="ticks")
np.random.seed(sum(map(ord, "axis_grids")))

## 数据网格绘图
http://seaborn.pydata.org/tutorial/axis_grids.html#subsetting-data-with-facetgrid

### 使用 `FacetGrid` 呈现子数据集

In [6]:
tips = sns.load_dataset("iris")

In [5]:
# 根据 time 初始化图片与子图片
Initializing the grid like this sets up the matplotlib figure and axes, 
g = sns.FacetGrid(tips, col="time")

SyntaxError: invalid syntax (<ipython-input-5-f2f2657a2584>, line 2)

In [None]:
g = sns.FacetGrid(tips, col="time")

# 使用 FacetGrid.map() 在网格中呈现数据
g.map(plt.hist, "tip");

In [None]:
g = sns.FacetGrid(tips, col="sex", hue="smoker")
g.map(plt.scatter, "total_bill", "tip", alpha=.7)

# 增加图例
g.add_legend();

In [None]:
g = sns.FacetGrid(tips, row="smoker", col="time", margin_titles=True)
g.map(sns.regplot, "size", "total_bill", color=".3", fit_reg=False, x_jitter=.1);

In [None]:
# 控制子图的大小、比例
g = sns.FacetGrid(tips, col="day", size=4, aspect=.5)
g.map(sns.barplot, "sex", "total_bill");

In [None]:
titanic = sns.load_dataset("titanic")
titanic = titanic.assign(deck=titanic.deck.astype(object)).sort("deck")

# 使用 gridspec_kws 个别控制子图大小
g = sns.FacetGrid(titanic, col="class", sharex=False,
                  gridspec_kws={"width_ratios": [5, 3, 3]})
g.map(sns.boxplot, "deck", "age");

In [None]:
ordered_days = tips.day.value_counts().index

# 自定义 row_order
g = sns.FacetGrid(tips, row="day", row_order=ordered_days,
                  size=1.7, aspect=4,)
g.map(sns.distplot, "total_bill", hist=False, rug=True);

In [None]:
# 定义调色板
pal = dict(Lunch="seagreen", Dinner="gray")
g = sns.FacetGrid(tips, hue="time", palette=pal, size=5)
g.map(plt.scatter, "total_bill", "tip", s=50, alpha=.7, linewidth=.5, edgecolor="white")
g.add_legend();

In [None]:
# 自定标识
g = sns.FacetGrid(tips, hue="sex", palette="Set1", size=5, hue_kws={"marker": ["^", "v"]})
g.map(plt.scatter, "total_bill", "tip", s=100, linewidth=.5, edgecolor="white")
g.add_legend();

In [None]:
# 如果一个离散变数有很多 level，不能使用 row，但可使用 col_wrap 定义每列的子图数量
attend = sns.load_dataset("attention").query("subject <= 12")
g = sns.FacetGrid(attend, col="subject", col_wrap=4, size=2, ylim=(0, 10))
g.map(sns.pointplot, "solutions", "score", color=".3", ci=None);

In [None]:
with sns.axes_style("white"):
    g = sns.FacetGrid(tips, row="sex", col="smoker", margin_titles=True, size=2.5)
g.map(plt.scatter, "total_bill", "tip", color="#334488", edgecolor="white", lw=.5);

# 使用 FacetGrid.map() 之后，调用 FacetGrid.set()，FacetGrid.set_axis_labels() 修改图片
g.set_axis_labels("Total bill (US Dollars)", "Tip");
g.set(xticks=[10, 30, 50], yticks=[2, 6, 10]);
g.fig.subplots_adjust(wspace=.02, hspace=.02);

In [None]:
g = sns.FacetGrid(tips, col="smoker", margin_titles=True, size=4)
g.map(plt.scatter, "total_bill", "tip", color="#338844", edgecolor="white", s=50, lw=1)

# 使用 ax 各别设定子图
for ax in g.axes.flat:
    ax.plot((0, 50), (0, .2 * 50), c='0.2', ls="--")
g.set(xlim=(0, 60), ylim=(0, 14));

#### 映射定制函数到网格

函数必须遵守以下规则:
1. 必须画到现有活跃的 axes
2. 必须接受表示位置参数的数据。FacetGrid 传递一系列以位置命名的数据给 FacetGrid.map()
3. 必须能接受颜色与标签参数

In [None]:
# x: 数据，kwargs: 传递给子图的参数
def quantile_plot(x, **kwargs):
    qntls, xr = stats.probplot(x, fit=False)
    plt.scatter(xr, qntls, **kwargs)

g = sns.FacetGrid(tips, col="sex", size=4)
g.map(quantile_plot, "total_bill");

In [None]:
def qqplot(x, y, **kwargs):
    _, xr = stats.probplot(x, fit=False)
    _, yr = stats.probplot(y, fit=False)
    plt.scatter(xr, yr, **kwargs)

g = sns.FacetGrid(tips, col="smoker", size=4)

# "total_bill", "tip" 当做 qqplot 的 x, y 参数
g.map(qqplot, "total_bill", "tip");

In [None]:
g = sns.FacetGrid(tips, hue="time", col="sex", size=4)
g.map(qqplot, "total_bill", "tip")

# 增加图例
g.add_legend();

In [None]:
# hue_kws 定制标识形状
g = sns.FacetGrid(tips, hue="time", col="sex", size=4,
                  hue_kws={"marker": ["s", "D"]})
g.map(qqplot, "total_bill", "tip", s=40, edgecolor="w")
g.add_legend();

In [None]:
def hexbin(x, y, color, **kwargs):
    cmap = sns.light_palette(color, as_cmap=True)
    plt.hexbin(x, y, gridsize=15, cmap=cmap, **kwargs)

# 暂时设定风格
with sns.axes_style("dark"):
    g = sns.FacetGrid(tips, hue="time", col="time", size=4)
    
# hexbin的参数，x="total_bill", y="tip", **kwargs=extent=[0, 50, 0, 10])
g.map(hexbin, "total_bill", "tip", extent=[0, 50, 0, 10]);

### 使用 `PairGrid` 与 `pairplot()` 绘画配对关系
http://seaborn.pydata.org/tutorial/axis_grids.html#plotting-pairwise-relationships-with-pairgrid-and-pairplot

In [None]:
# 载入数据集
iris = sns.load_dataset("iris")
iris.ix[::50]

In [None]:
g = sns.PairGrid(iris)
g.map(plt.scatter);

In [None]:
g = sns.PairGrid(iris)

# 指定对角线套用函数
g.map_diag(plt.hist)

# 指定非对角套用函数
g.map_offdiag(plt.scatter);

In [None]:
# 指定类别颜色
g = sns.PairGrid(iris, hue="species")

g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)

# 类别图例
g.add_legend();

In [None]:
# 指定 column
g = sns.PairGrid(iris, vars=["sepal_length", "sepal_width"], hue="species")

g.map(plt.scatter);

In [None]:
g = sns.PairGrid(iris)

# 上三角套用函数
g.map_upper(plt.scatter)

# 小三角套用函数
g.map_lower(sns.kdeplot, cmap="Blues_d")

# 对角套用函数
g.map_diag(sns.kdeplot, lw=3, legend=False);

In [None]:
# 自定义网格 x, y 变数
g = sns.PairGrid(tips, y_vars=["tip"], x_vars=["total_bill", "size"], size=4)
g.map(sns.regplot, color=".3")
g.set(ylim=(-1, 11), yticks=[0, 5, 10]);

In [None]:
# 设置更美观的调色盘
g = sns.PairGrid(tips, hue="size", palette="GnBu_d")
g.map(plt.scatter, s=50, edgecolor="white")
g.add_legend();

In [None]:
# PairGrid 有弹性，但如果只想看一眼数据，用 pairplot() 更快速
sns.pairplot(iris, hue="species");

In [None]:
# 定制美学细节
sns.pairplot(iris, hue="species", palette="Set2", diag_kind="kde", size=2.5)