In [None]:
import numpy as py
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline    
# %matplotlib is a magic function in IPython.

df = sns.load_dataset("diamonds")
diamonds = sns.load_dataset("diamonds")
tips = sns.load_dataset("tips")
penguins = sns.load_dataset("penguins")
flights = sns.load_dataset("flights")

df.shape

In [None]:
# sns.get_dataset_names()

# Matplotlib to Pandas to Seaborn

In [None]:
# NOTICE: One y value per X value

x = ['Ideal','Premium','Very Good','Good','Fair',]
y = [3457,4584,3981,3928,4358]

In [None]:
type(x)

In [None]:
# Matplotlib
plt.plot(x, y);

In [None]:
# This works but data is not aggregated
plt.plot(df.cut, df.price)


In [None]:
df2 = df.groupby('cut').mean().reset_index() # creating a smaller dataset
df2

In [None]:
plt.plot(df2.cut, df2.price)

Below ...
**pandas** ploting functions (e.g., .plot) uses matplotlib

In [None]:
# This is a pandas plot of the raw data (df)
#df.plot(x='cut', y = 'price')
df2.plot(x='cut', y = 'price')

In [None]:
# This is a seaborn plot of df (not grouped) - automatic aggregation
sns.lineplot(data=df, x="cut", y="price")

In [None]:
# Using seaborn's set_theme method
# Seaborn is updating Matplotlib's rc parameters.  rc params are the default style settings
# rc = runtime configuration.
# https://matplotlib.org/stable/tutorials/introductory/customizing.html

sns.set_theme()

In [None]:
# Matplotlib
plt.plot(x, y);

# Seaborn

Seaborn is a library for making statistical graphics in Python. It builds on top of matplotlib and integrates closely with pandas data structures.

Its plotting functions operate on dataframes and arrays containing whole datasets and internally perform the necessary semantic mapping and **statistical aggregation** to produce informative plots. 

The **declarative** API lets you focus on what the different elements of your plots mean, rather than on the details of how to draw them.



## Figure & Axes Level Plotting Functions

 <img src="https://drive.google.com/uc?id=1Lcc0xVij3hfy2QVjUBPln5O8pXAnk8Ef" width=600 />

#### http://seaborn.pydata.org/tutorial.html

## What's the difference between figure and axes level options?

Axes-level functions make self-contained plots
The axes-level functions are written to act like drop-in replacements for matplotlib functions. While they add axis labels and legends automatically, they don’t modify anything beyond the axes that they are drawn into. That means they can be composed into arbitrarily-complex matplotlib figures with predictable results.

In [None]:
# Figure level
sns.displot( data=df, x="price", hue="cut", multiple = "stack", kind = 'hist', bins = 20)

In [None]:
# Axes level
sns.histplot(data=df, x='price', hue='cut', multiple = 'stack', bins = 20)

## Using sns and matplotlib together

In [None]:
plt.figure(figsize=(20, 15))
plt.subplot(2,2,1)
sns.boxplot(y = 'depth', x = 'cut', data = df)
plt.subplot(2,2,2)
sns.scatterplot(y = 'price', x = 'carat', data = df)
plt.subplot(2,2,3)
sns.boxplot(y = 'carat', x = 'cut', data = df)
plt.subplot(2,2,4)
sns.histplot(x = 'price', data = df)

In [None]:
# From Seaborn docuentation
ax = sns.boxplot(x="day", y="total_bill", data=tips)
ax = sns.swarmplot(x="day", y="total_bill", data=tips, color=".25")

## Figure level

- Figure-level functions interface with matplotlib through a seaborn object, usually a FacetGrid
- Each module (relational, distributions, categorical) has a single figure-level function


In [None]:
# The default for distplot is a histogram

sns.displot(data=df, x="price")
plt.savefig('save_as_a_png.png')

# sns.displot(data=df, x="price",height=8, aspect=15/8)  Use height and aspect to change the size of the figure.

In [None]:
sns.displot(data=df, x="price", hue="cut", multiple="stack")

## Change the plot type with kind = ...

In [None]:
sns.displot(data=df, x="price", hue="cut", col="cut", kind = 'hist')

In [None]:
sns.displot(data=df, x="price", hue="cut", col="cut", kind = 'kde')

# kernel density estimation

In [None]:
sns.displot(data=df, x="price", hue="cut", col="cut", kind = 'ecdf')

# empirical cumulative distribution functions

In [None]:
sns.displot(data=df, x="price", hue="cut", kind = 'kde', rug = True)

In [None]:
# This one might take a minute to run.

sns.displot(data=df, x="carat", y='price', kind ='kde')

## Seaborn Exercise 1 - 10 minutes

- Use the relational (relplot) figure-level function to create two charts.  First a scatterplot and second a line chart.
- Use the 'tips' data set.
- For the scatterplot, determine if tips increasewith the bill amount.  Try to show a distinction between data points based on time of day.
- For the line chart, show how tips change based on size of the party.


In [None]:
tips.head()

In [None]:
# Place scatterplot here


In [None]:
# Place line chart here


# Facet Grids - Creating Small Multiples

In [None]:
p = sns.FacetGrid(df) # p is the facet grid

In [None]:
p = sns.FacetGrid(df, col = 'cut') # 1 column for each facet (value) of cut.

# matplotlib will squeeze the 5 plots into the orginal size.

In [None]:
p = sns.FacetGrid(df, col = 'cut', height = 4, aspect = 0.75) 
# Aspect ratio of each facet, so that aspect * height gives the width of each facet.

In [None]:
p = sns.FacetGrid(df, col = 'cut', height = 4, aspect = 0.75, col_wrap = 3) 
# Aspect ratio of each facet, so that aspect * height gives the width of each facet.

In [None]:
sns.set_style('white')
penguins = sns.load_dataset("penguins")

In [None]:
p = sns.FacetGrid(penguins, col='island');

In [None]:
p = sns.FacetGrid(penguins, row='island');

In [None]:
type(p)

## Managing the Facet Grid

In [None]:
penguins.head()

In [None]:
sns.displot(data=penguins, x="flipper_length_mm", col="island", kind = 'hist')

## Using methods of FacetGrid

These will be used when an axes level plot is used.

Three steps:
- set up the FacetGrid
- identify the plot type using .map or .map_dataframe
- customize

### .map()
1. Set up the facet grid (format the facets)
2. Describe what should be plotted in the grids
3. Add extras - labels, titles, etc.

To draw a plot on every facet, pass a function and the name of one or more columns in the dataframe to FacetGrid.map()

In [None]:
p = sns.FacetGrid(penguins, col='island')
p.map(sns.histplot, 'flipper_length_mm'); # Requires positional arguements, not named (x = 'flipper_length_mm')

### .map_dataframe()

In [None]:
p = sns.FacetGrid(penguins, col='island')
p.map_dataframe(sns.histplot, x = 'flipper_length_mm'); # It is the x= that is different.  This is the named arguement

In [None]:
p = sns.FacetGrid(penguins, col='island', height = 6, aspect =1)
p.map_dataframe(sns.scatterplot,  y='bill_length_mm',x='bill_depth_mm');

### .set_axis_labels(),    .set_titles(),    sharey,    ylim

In [None]:

p = sns.FacetGrid(penguins, col='island', height = 6, aspect =1)
p.map_dataframe(sns.scatterplot, x='bill_depth_mm', y='bill_length_mm')

p.set_axis_labels('BILL DEPTH (mm)', 'BILL LENGTH (mm)'); # if the LABELS needs to be changed
p.set_titles(col_template='{col_name} Island'); # if the TITLE needs to be changed

In [None]:

p = sns.FacetGrid(penguins, col='island', row='species', height = 4, aspect =1)
p.map_dataframe(sns.scatterplot, x='bill_depth_mm', y='bill_length_mm')
p.set_axis_labels('Bill Depth (mm)', 'Bill Length (mm)')
p.set_titles(row_template='{row_name}', col_template='{col_name} Island');

- sharey: False means the y-axis will not be shared and each plot will get its own y-axis.
- ylim: Sets a specified range for all y-axes shown

#### sharey = False

In [None]:
p = sns.FacetGrid(penguins, col='island', height = 4, aspect =1, sharey=False)

#p = sns.FacetGrid(penguins, col='island', height = 4, aspect =1, sharey=False, ylim=(20, 70))

p.map_dataframe(sns.scatterplot, x='bill_depth_mm', y='bill_length_mm');
p.set_axis_labels('BILL DEPTH (mm)', 'BILL LENGTH (mm)'); 
p.set_titles(col_template='{col_name} Island'); 

### hue & pallette

In [None]:
p = sns.FacetGrid(penguins, col='island', height = 4, aspect =1, sharey=False, ylim=(20, 70), hue = 'species')
p.map_dataframe(sns.scatterplot, x='bill_depth_mm', y='bill_length_mm');
p.set_axis_labels('BILL DEPTH (mm)', 'BILL LENGTH (mm)'); 
p.set_titles(col_template='{col_name} Island'); 

In [None]:
p = sns.FacetGrid(penguins, 
                  col='island', 
                  height = 4, 
                  aspect =1, 
                  sharey=False, 
                  ylim=(20, 70), 
                  hue = 'species',
                  palette = 'magma'
                  #palette = ['grey','blue','red']
                 )

p.map_dataframe(sns.scatterplot, x='bill_depth_mm', y='bill_length_mm', marker = '+');
p.set_axis_labels('BILL DEPTH (mm)', 'BILL LENGTH (mm)'); 
p.set_titles(col_template='{col_name} Island');

### Accomplish the same without defining the facet grid first

In [None]:
p = sns.relplot(data=penguins, x='bill_depth_mm', y='bill_length_mm', kind = 'scatter',
            col='island', height = 4, aspect =1, hue = 'species', palette = 'magma',
            marker = '+',
            size = 'body_mass_g',
            style = 'sex',
            facet_kws={'sharey': False, 'sharex': True, 'ylim':(20,70)}
               #   sharey=False, ylim=(20, 70), #palette = ['grey','blue','red']
           )

p.map(plt.axhline, 
        y=45, color=".7", 
        dashes=(2, 1), 
        zorder=0)
p.set_axis_labels('Bill Depth (mm)', 'Bill Length (mm)')
p.set_titles(row_template='{row_name}', col_template='{col_name} Island')

### Method Chaining 

In [None]:
p = sns.relplot(data=penguins, x='bill_depth_mm', y='bill_length_mm', kind = 'scatter',
            col='island', height = 4, aspect =1, hue = 'species', palette = 'magma'
            ,marker = '+',
            size = 'body_mass_g',
            style = 'sex',
            facet_kws={'sharey': False, 'sharex': True, 'ylim':(20,70)}
           )

(
p.map(plt.axhline, 
        y=45, color=".7", 
        dashes=(2, 1), 
        zorder=0)
 .set_axis_labels('Bill Depth (mm)', 'Bill Length (mm)')
 .set_titles(row_template='{row_name}', col_template='{col_name} Island'))

## Seaborn Exercise 2 - 10 minutes

Using the flights info, create a visualization that plots - for each month - the number of passengers by year.  
There should be one plot per month.

In [None]:
flights.info()

In [None]:
flights.head(20)

In [None]:
flights.shape

In [None]:
# SNS Exercise 2 solution here.

## Seaborn Exercise 3 - 15 minutes

The distplot below is quick 'one-liner' plot.
Take a little more time to create an axes for each cut and the axes are one above the other.

<img src="https://drive.google.com/uc?id=1SpS1UGerqF5snj0Y7p3lkxtOYSiAgdXz" width=1200 />


In [None]:
# This is the chart on the left with diamond data.
sns.displot(data=df, x="price", hue="cut", kind = 'kde', rug = True)

In [None]:
# Place Exercise 3 solution here.



# Multiple Views

## Jointplot

In [None]:
sns.set_style("ticks")
sns.jointplot(data = penguins, x="flipper_length_mm", y="bill_length_mm", hue="species", height = 8 )

In [None]:
sns.set_style("ticks")
sns.jointplot(data = penguins, x="flipper_length_mm", y="bill_length_mm", hue="species", height = 8 )

## Pairplot

In [None]:
sns.pairplot(data = df, hue = 'cut')

In [None]:
xyz = df.corr()
xyz

In [None]:
sns.heatmap(xyz, annot=False)

In [None]:
# Calculate correlations
corr = df.corr()
plt.figure(figsize=(12,8))
plt.title('Quantitative Variables Correlation')

# Heatmap
sns.heatmap(corr,cmap='plasma',annot=True)

In [None]:
plt.figure(figsize=(12,8))
corr = df.corr()
ax = sns.heatmap(
    corr,
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True,
    annot=True, annot_kws={"size":10}
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);

In [None]:
plt.figure(figsize=(12,8))
corr = df.corr()
ax = sns.heatmap(
    corr,
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True,
    annot=False, annot_kws={"size":20}
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=45,
    horizontalalignment='right'
);