# Seaborn

Source: https://github.com/jdhp-docs/notebooks/blob/master/python_seaborn_en.ipynb

<a href="https://colab.research.google.com/github/jdhp-docs/notebooks/blob/master/python_seaborn_en.ipynb"><img align="left" src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>

<a href="https://mybinder.org/v2/gh/jdhp-docs/notebooks/master?filepath=python_seaborn_en.ipynb"><img align="left" src="https://mybinder.org/badge.svg" alt="Open in Binder" title="Open and Execute in Binder"></a>

Official documentation: https://seaborn.pydata.org/index.html

In [None]:
import seaborn as sns
import pandas as pd
import math

In [None]:
sns.__version__

## Aspect

In [None]:
sns.set_context('talk')
#sns.set_context('poster')

### Figsize

In [None]:
df = sns.load_dataset("fmri")
df.head()

In [None]:
sns.relplot(x="timepoint", y="signal", kind="line", data=df);

In [None]:
sns.relplot(x="timepoint", y="signal", kind="line", data=df,
            height=6, aspect=2);

## Relplot

### Scatter plot

In [None]:
tips = sns.load_dataset("tips")
tips.head()

In [None]:
sns.relplot(x="total_bill", y="tip", data=tips);

In [None]:
sns.scatterplot(x="total_bill", y="tip", data=tips);

In [None]:
sns.relplot(x="total_bill", y="tip", hue="size", size="day", style="time", row="sex", col="smoker", data=tips);

### Line plot

Official documentation: https://seaborn.pydata.org/tutorial/relational.html#aggregation-and-representing-uncertainty

"The default behavior in seaborn is to aggregate the multiple measurements at each x value by plotting the mean and the 95% confidence interval around the mean."

#### First example

In [None]:
l = []

for run in range(100):
    for x in np.linspace(-10, 10, 100):
        y = 0
        row = [x, y + np.random.normal(), run]
        l.append(row)
        
df = pd.DataFrame(l, columns=["x", "y", "run"])
df.head()

In [None]:
sns.relplot(x="x", y="y", kind="line", data=df,
            height=6, aspect=2)

plt.axhline(0, color="r", linestyle=":", label="Actual mean")

plt.legend();

In [None]:
sns.relplot(x="x", y="y", kind="line", data=df,
            height=6, aspect=2,
            units="run", estimator=None, alpha=0.1)

plt.axhline(0, color="r", linestyle=":", label="Actual mean")

plt.legend();

In [None]:
sns.relplot(x="x", y="y", kind="line", data=df,
            height=6, aspect=2,
            estimator=np.median)

plt.axhline(0, color="r", linestyle=":", label="Actual median")

plt.legend();

#### Second example

In [None]:
l = []

for run in range(100):
    for func in ("sin", "cos"):
        for x in np.linspace(-10, 10, 100):
            y = math.sin(x) if func == "sin" else math.cos(x)
            row = [x, y + np.random.normal(), func, run]
            l.append(row)
        
df = pd.DataFrame(l, columns=["x", "y", "func", "run"])
df.head()

In [None]:
sns.relplot(x="x", y="y", kind="line", hue="func", data=df,
            height=6, aspect=2);

#### Third example

In [None]:
fmri = sns.load_dataset("fmri")
fmri.head()

In [None]:
sns.relplot(x="timepoint", y="signal", data=fmri,
            height=6, aspect=2);

In [None]:
sns.catplot(x="timepoint", y="signal", data=fmri, aspect=3);

In [None]:
sns.relplot(x="timepoint", y="signal", kind="line", data=fmri,
            height=6, aspect=2);

## Pairplot

In [None]:
# https://seaborn.pydata.org/tutorial/distributions.html#visualizing-pairwise-relationships-in-a-dataset

iris = sns.load_dataset("iris")
iris.head()

In [None]:
sns.pairplot(iris, hue="species");

In [None]:
# https://seaborn.pydata.org/tutorial/distributions.html#visualizing-pairwise-relationships-in-a-dataset

titanic = sns.load_dataset("titanic")
titanic.head()

In [None]:
sns.pairplot(titanic, vars=["survived", "pclass", "fare"], hue="survived");