# Seaborn

Seaborn is typically imported like `import seaborn as sb`.
Seaborn is built on top of Matplotlib.
 
## Sorting
### Nominal Data
``` python
cat_order = df['cat_var'].value_counts().index
sb.countplot(data = df, x = 'cat_var', order = cat_order)
```

### Ordinal Data
``` python
# this method requires pandas v0.21 or later
level_order = ['Alpha', 'Beta', 'Gamma', 'Delta']
ordered_cat = pd.api.types.CategoricalDtype(ordered = True, categories = level_order)
df['cat_var'] = df['cat_var'].astype(ordered_cat)

# use this method if you have pandas v0.20.3 or earlier
df['cat_var'] = df['cat_var'].astype('category', ordered = True, categories = level_order)

sb.countplot(data = df, x = 'cat_var')
```

## Color Palettes
 - `base_color = sb.color_palette()[0]` - Useful for setting a graph to a single color.

## Bar Charts
 - `sb.countplot(data, x = 'cat_var')` - Show the counts of observations in each categorical bin using bars.
 - `sb.countplot(data, y = 'cat_var')` - Horizontal bar chart.
 - `sb.barplot(indexes, values)` - Show point estimates and confidence intervals as rectangular bars (used for summarized data).

## Histogram
 - `sb.distplot(data)` - Creates a histogram with a smartish bin sizes and a kernel density estimate (KDE) line.

## Scatter Plot
 - `sb.regplot(data, x = 'num_var1', y = 'num_var2')` - Creates a scatter plot with a regression line.
 
## Violin Plot
 - `sb.violinplot(data, x = 'cat_var', y = 'num_var')` - Creates a violin plot with a box plot in the middle.
 - `sb.violinplot(data, x = 'num_var', y = 'cat_var')` - Creates a horizontal violin plot.
 
## Box Plot
 - `sb.boxplot(data, x = 'cat_var', y = 'num_var')` - Creates a box plot.
 - `sb.boxplot(data, x = 'num_var', y = 'cat_var')` - Creates a horizontal box plot.

## Line Plot
 - `sb.pointplot(data, x = 'cat_var', y = 'num_var')` - Creates a line plot.

## Features in Matplotlib
 - Rotating tick labels and set their position in a graph -> `plt.xticks` and `plt.yticks`
 - Set the axis labels in a graph -> `plt.xlabel` and `plt.ylabel`

## Examples
### Relative Frequency Bar Chart
```python
# get proportion taken by most common group for derivation
# of tick marks
n_points = df.shape[0]
max_count = df['cat_var'].value_counts().max()
max_prop = max_count / n_points

# generate tick mark locations and names
tick_props = np.arange(0, max_prop, 0.05)
tick_names = ['{:0.2f}'.format(v) for v in tick_props]

# create the plot
base_color = sb.color_palette()[0]
sb.countplot(data = df, x = 'cat_var', color = base_color)
plt.yticks(tick_props * n_points, tick_names)
plt.ylabel('proportion')
```

### Add Annotation With Bar Value
```python
# create the plot
base_color = sb.color_palette()[0]
sb.countplot(data = df, x = 'cat_var', color = base_color)

# add annotations
n_points = df.shape[0]
cat_counts = df['cat_var'].value_counts()
locs, labels = plt.xticks() # get the current tick locations and labels

# loop through each pair of locations and labels
for loc, label in zip(locs, labels):

    # get the text property for the label to get the correct count
    count = cat_counts[label.get_text()]
    pct_string = '{:0.1f}%'.format(100*count/n_points)

    # print the annotation just below the top of the bar
    plt.text(loc, count-8, pct_string, ha = 'center', color = 'w')
```

### NaN Values Bar Chart
```python
na_counts = df.isna().sum()
sb.barplot(na_counts.index.values, na_counts)
```

### Clustered Bar Chart With Legend
```python
ax = sb.countplot(data = df, x = 'cat_var1', hue = 'cat_var2')
ax.legend(loc = 8, ncol = 3, framealpha = 1, title = 'cat_var2')
```

### Histogram
```python
bin_edges = np.arange(0, df['num_var'].max()+1, 1)
sb.distplot(df['num_var'], bins = bin_edges, kde = False, hist_kws = {'alpha' : 1})
```

### Scatter Plot With Log Transform
```python
def log_trans(x, inverse = False):
    if not inverse:
        return np.log10(x)
    else:
        return np.power(10, x)

sb.regplot(df['num_var1'], df['num_var2'].apply(log_trans))
tick_locs = [10, 20, 50, 100, 200, 500]
plt.yticks(log_trans(tick_locs), tick_locs)
```

### Scatter Plot With Jitter and Transparency
```python
sb.regplot(data = df, x = 'disc_var1', y = 'disc_var2', fit_reg = False, x_jitter = 0.2, y_jitter = 0.2, scatter_kws = {'alpha' : 1/3})
```

### Violin Plot Without Box Plot
```python
sb.violinplot(data = df, x = 'cat_var', y = 'num_var', inner = None)
```

### Violin Plot With Quantile Lines
```python
sb.violinplot(data = df, x = 'cat_var', y = 'num_var', inner = 'quartile')
```

### Line Plot With Multiple Lines
```python
def freq_poly(x, bins = 10, **kwargs):
    """ Custom frequency polygon / line plot code. """
    # set bin edges if none or int specified
    if type(bins) == int:
        bins = np.linspace(x.min(), x.max(), bins+1)
    bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2

    # compute counts
    data_bins = pd.cut(x, bins, right = False,
                       include_lowest = True)
    counts = x.groupby(data_bins).count()

    # create plot
    plt.errorbar(x = bin_centers, y = counts, **kwargs)

bin_edges = np.arange(-3, df['num_var'].max()+1/3, 1/3)
g = sb.FacetGrid(data = df, hue = 'cat_var', size = 5)
g.map(freq_poly, "num_var", bins = bin_edges)
g.add_legend()
```

### Faceting
```python
group_means = df.groupby(['many_cat_var']).mean()
group_order = group_means.sort_values(['num_var'], ascending = False).index

g = sb.FacetGrid(data = df, col = 'many_cat_var', col_wrap = 5, size = 2,
                 col_order = group_order)
g.map(plt.hist, 'num_var', bins = np.arange(5, 15+1, 1))
g.set_titles('{col_name}')
```