# Visualizing statistical relationships

Statistical analysis is a process of understanding how variables in a dataset relate to each other and how those relationships depend on other variables. Visualization can be a core component of this process because, when data are visualized properly, the human visual system can see trends and patterns that indicate a relationship.

### 1. Numerical Data Ploting
* relplot()
* scatterplot()
* lineplot()

### 2. Categorical Data Ploting
* catplot()
* boxplot()
* stripplot()
* swarmplot()

### 3. Visualizing Distribution of the Data
* distplot()
* kdeplot()
* jointplot()
* rugplot()

### 4. Linear Regression and Relationship
* regplot()
* lmplot()

### 5. Controlling Ploted Figure Aesthetics
* figure styling
* axes styling
* color palettes

In [1]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
sns.set(style = 'darkgrid')

In [3]:
tips = sns.load_dataset('tips')
tips.tail()

URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1108)>

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', data = tips)

In [None]:
dir(sns.FacetGrid)

In [None]:
tips['smoker'].value_counts()

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', data = tips, hue = 'smoker', style = 'time')

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', style = 'size', data = tips)

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', hue = 'size', data = tips, palette = 'ch:r=-0.8, l= 0.95')

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', data = tips, size = 'size')

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', data = tips, size = 'size', sizes = (15, 200))

In [None]:
from numpy.random import randn

In [None]:
df = pd.DataFrame(dict(time = np.arange(500), value = randn(500).cumsum()))

In [None]:
df.head()

In [None]:
sns.relplot(x = 'time', y = 'value', kind = 'line', data = df, sort = True)

In [None]:
df = pd.DataFrame(randn(500, 2).cumsum(axis = 0), columns = ['time', 'value'])

In [None]:
df.head()

In [None]:
sns.relplot(x = 'time', y = 'value', kind = 'line', data = df, sort = False)

In [None]:
fmri = sns.load_dataset('fmri')
fmri.head()

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', kind = 'line', data = fmri, ci = False)

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', kind = 'line', data = fmri, ci = 'sd')

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', estimator = None, kind = 'line', data = fmri)

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', hue = 'event', kind = 'line', data = fmri)

In [None]:
fmri.head()

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', hue = 'region', style = 'event', kind = 'line', data = fmri)

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', hue = 'region', style = 'event', kind = 'line', data = fmri, markers = True, dashes = False)

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', hue = 'event', style = 'event', kind = 'line', data = fmri)

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', hue = 'region', units = 'subject', estimator = None, kind = 'line', data = fmri.query("event == 'stim'"))

In [None]:
fmri.head()

In [None]:
dots = sns.load_dataset('dots').query("align == 'dots'")
dots.head()

In [None]:
sns.relplot(x = 'time', y = 'firing_rate', data = dots, kind = 'line', hue = 'coherence', style = 'choice')

In [None]:
palette = sns.cubehelix_palette(light = 0.5, n_colors=6)
sns.relplot(x = 'time', y = 'firing_rate', data = dots, kind = 'line', hue = 'coherence', style = 'choice', palette=palette)

In [None]:
sns.relplot(x = 'time', y = 'firing_rate', hue = 'coherence', size = 'choice', style = 'choice', kind = 'line', data = dots)

In [None]:
df = pd.DataFrame(dict(time = pd.date_range('2019-06-02', periods = 500), value = randn(500).cumsum()))

In [None]:
df.head()

In [None]:
g = sns.relplot(x = 'time', y = 'value', kind = 'line', data = df)
g.fig.autofmt_xdate()

In [None]:
tips.head()

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', hue = 'smoker', col = 'time', data = tips)

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', hue = 'smoker', col = 'size', data = tips)

In [None]:
sns.relplot(x = 'timepoint', y = 'signal', hue = 'subject', col = 'region', row = 'event', height=3, kind = 'line', estimator = None, data = fmri)

In [None]:
sns.relplot(x = 'total_bill', y = 'tip', hue = 'smoker', col = 'size', data = tips, col_wrap=3, height=3)

In [None]:
sns.scatterplot(x = 'total_bill', y = 'tip', data = tips)

In [None]:
fmri.head()

In [None]:
sns.lineplot(x = 'timepoint', y  = 'signal', style = 'event', hue = 'region', data = fmri, markers = True, ci = 68, err_style='bars')

In [None]:
sns.lineplot(x = 'timepoint', y = 'signal', hue = 'event', units = 'subject', estimator = None, lw = 1, data = fmri.query("region == 'frontal'"))

In [None]:
dots.head()

In [None]:
sns.lineplot(x = 'time', y = 'firing_rate', hue = 'coherence', style = 'choice', data = dots)

In [None]:
sns.scatterplot(x = 'total_bill', y = 'tip', data = tips, hue = 'smoker', size = 'size', style = 'time')

In [None]:
iris = sns.load_dataset('iris')

In [None]:
iris.head()

In [None]:
sns.scatterplot(x = 'sepal_length', y = 'petal_length', data = iris)

In [None]:
sns.scatterplot(x = iris['sepal_length'], y = iris['petal_length'])

### 2. Categorical Data Ploting
- catplot()
- boxplot()
- stripplot()
- swarmplot()
- etc...

In [None]:
tips.head()

In [None]:
titanic = sns.load_dataset('titanic')

In [None]:
titanic.head()

In [None]:
#catplot()

In [None]:
sns.catplot(x = 'day', y = 'total_bill', data = tips)

In [None]:
sns.catplot(y = 'day', x = 'total_bill', data = tips)

In [None]:
sns.catplot(x = 'day', y = 'total_bill', data = tips, jitter = False)

In [None]:
sns.catplot(x = 'day', y = 'total_bill', data = tips, kind = 'swarm', hue = 'size')

In [None]:
sns.catplot(x = 'smoker', y = 'tip', data = tips, order= ['No', 'Yes'])

In [None]:
tips.head()

In [None]:
sns.catplot(x = 'day', y = 'total_bill', kind = 'box', data = tips, hue = 'sex')

In [None]:
sns.catplot(x = 'day', y = 'total_bill', kind = 'box', data = tips, hue = 'sex', dodge = False)

In [None]:
diamonds = sns.load_dataset('diamonds')
diamonds.head()

In [None]:
sns.catplot(x = 'color', y = 'price', kind = 'boxen', data = diamonds.sort_values('color'))

In [None]:
sns.catplot(x = 'color', y = 'price', kind = 'boxen', data = diamonds.sort_values('color'))

In [None]:
sns.catplot(x = 'day', y = 'total_bill', kind = 'boxen', data = tips, dodge = False)

In [None]:
sns.catplot(x = 'total_bill', y = 'day', hue = 'sex', kind = 'violin', data = tips, split = True,)

In [None]:
g = sns.catplot(x = 'day', y = 'total_bill', kind = 'violin', inner = None, data = tips)
sns.swarmplot(x = 'day', y = 'total_bill', color = 'k', size = 3, data = tips, ax = g.ax)

In [None]:
titanic.head()

In [None]:
sns.catplot(x = 'sex', y = 'survived', hue = 'class', kind = 'bar', data = titanic)

In [None]:
sns.catplot(x = 'deck', kind = 'count', palette = 'ch:0.95', data = titanic, hue = 'class')

In [None]:
sns.catplot(x = 'sex', y = 'survived', hue = 'class', kind = 'point', data = titanic)

### 3. Visualizing Distribution of the Data
- distplot()
- kdeplot()
- jointplot()
- rugplot()

In [None]:
x = randn(100)

In [None]:
sns.distplot(x, kde = True, hist = False, rug= False, bins= 30)

In [None]:
sns.kdeplot(x, shade=True, cbar = True, bw = 1, cut = 0)

In [None]:
tips.head()

In [None]:
x = tips['total_bill']
y = tips['tip']

In [None]:
sns.jointplot(x = x, y=y)

In [None]:
sns.set()
sns.jointplot(x = x, y=y, kind = 'hex')

In [None]:
sns.jointplot(x = x, y = y, kind = 'kde')

In [None]:
f, ax = plt.subplots(figsize = (6,6))
cmap = sns.cubehelix_palette(as_cmap = True, dark = 0, light = 1, reverse= True)
sns.kdeplot(x, y, cmap = cmap, n_levels=60, shade=True)

In [None]:
g = sns.jointplot(x, y, kind = 'kde', color = 'm')
g.plot_joint(plt.scatter, c = 'w', s = 30, linewidth = 1, marker = '+')
g.ax_joint.collections[0].set_alpha(0)

In [None]:
sns.pairplot(iris)

In [None]:
g = sns.PairGrid(iris)
g.map_diag(sns.kdeplot)
g.map_offdiag(sns.kdeplot, n_levels = 10)

### 4. Linear Regression and Relationship
- regplot()
- lmplot()

In [None]:
tips.head()

In [None]:
sns.regplot(x = 'total_bill', y = 'tip', data = tips)

In [None]:
sns.lmplot(x = 'total_bill', y= 'tip', data = tips)

In [None]:
sns.lmplot(x = 'size', y = 'tip', data = tips, x_jitter = 0.05)

In [None]:
sns.lmplot(x = 'size', y = 'tip', data = tips, x_estimator = np.mean)

In [None]:
data = sns.load_dataset('anscombe')
data.head()

In [None]:
data['dataset'].value_counts()

In [None]:
sns.lmplot(x = 'x', y = 'y', data = data.query("dataset == 'I'"), ci = None, scatter_kws={'s': 80})

In [None]:
sns.lmplot(x = 'x', y = 'y', data = data.query("dataset == 'II'"), ci = None, scatter_kws={'s': 80}, order = 1)

In [None]:
sns.lmplot(x = 'x', y = 'y', data = data.query("dataset == 'III'"), ci = None, scatter_kws={'s': 80}, robust=True)

In [None]:
f, ax = plt.subplots(figsize = (8,4))
sns.regplot(x = 'total_bill', y = 'tip', data = tips, ax = ax)

In [None]:
sns.lmplot(x = 'total_bill', y = 'tip', data = tips, col = 'day', col_wrap=2, height = 4)

### 5. Controlling Ploted Figure Aesthetics
- figure styling
- axes styling
- color palettes
- etc..

In [None]:
def sinplot(flip = 1):
    x = np.linspace(0, 14, 100)
    for i in range(1,7):
        plt.plot(x, np.sin(x+i*0.5)*(7-i)*flip)

In [None]:
sinplot(-1)

In [None]:
sns.set_style('ticks', {'axes.grid': True, 'xtick.direction': 'in'})
sinplot()
sns.despine(left = True, bottom= False)

In [None]:
sns.axes_style()

In [None]:
sns.set_style('darkgrid')

In [None]:
sns.set_context('talk', font_scale=1.5)
sinplot()

In [None]:
current_palettes = sns.color_palette()
sns.palplot(current_palettes)

In [None]:
sns.palplot(sns.color_palette('hls', 8))