In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
plt.rcParams['figure.figsize']=4,4

In [None]:
penguins = sns.load_dataset('penguins')

In [None]:
penguins

In [None]:
penguins.info()

In [None]:
penguins.describe()

In [None]:
penguins.dropna(inplace=True)

## Simple distribution plots

In [None]:
sns.displot(penguins["flipper_length_mm"])

In [None]:
sns.displot(penguins["flipper_length_mm"],color='purple',bins=30)

In [None]:
sns.displot()

In [None]:
sns.displot(penguins["flipper_length_mm"],kind="hist", rug=True)

In [None]:
sns.displot(penguins["flipper_length_mm"],kind="kde")

## Scatter plots

Provide insights regarding the relationships amongst two variables

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm', data=penguins)

We can easily change the size using the `s` argument

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm',s=60, data=penguins)

Using color we can gain insights about a third variable

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm', hue='sex', data=penguins)

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm', hue='sex',
                hue_order=['Female','Male'], data=penguins)

In [None]:
hue_colors = {'Female': 'orange', 'Male':'cadetblue'}
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm', hue='sex', 
                palette=hue_colors, data=penguins)

We can use different symbol for each category

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm',
                style='island',s=80, data=penguins)

In the previous the variable we used for `hue` was categorical. Let's see what happens if we include a continous variable

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm', 
                hue='body_mass_g', s=100, data=penguins)

In [None]:
sns.scatterplot(x='bill_length_mm', y='bill_depth_mm', 
                hue='body_mass_g',s=100, alpha=0.7, data=penguins)

## A different type of visualization of two variables

In [None]:
sns.jointplot?

In [None]:
sns.jointplot(
    data=penguins,
    x='flipper_length_mm',
    y='body_mass_g',
    height=8,
    kind='reg')

In [None]:
sns.jointplot(
    data=penguins,
    x='flipper_length_mm',
    y='body_mass_g',
    height=8,
    kind='kde',
    cbar=False)

## Count plots

In [None]:
sns.countplot(x='island', data=penguins)

In [None]:
sns.countplot(x='island', hue='sex', data=penguins)

In [None]:
sns.countplot(x='island', hue='sex',order=['Biscoe', 'Dream', 'Torgersen'], data=penguins)

## Easy creation of multiple graphs

Below we see two flexible approaches for easy creation of multiple graphs named `relplot` and `catplot` that can be used instead of `scatterplot` and `countplot` respectively 

### `relplot`

In [None]:
sns.relplot(x='bill_length_mm', y='bill_depth_mm', 
            col='sex', data=penguins, kind='scatter')

In [None]:
sns.relplot(x='bill_length_mm', y='bill_depth_mm', 
            row='sex', kind='scatter', data=penguins)

In [None]:
sns.relplot(x='bill_length_mm', y='bill_depth_mm', 
            row='island',col='sex',kind='scatter', data=penguins)

In [None]:
sns.relplot(x='bill_length_mm', y='bill_depth_mm', 
            col='species', col_wrap=2, kind='scatter', data=penguins)

In [None]:
sns.relplot(x='bill_length_mm', y='bill_depth_mm', 
            col='species', col_wrap=2,
            col_order=['Gentoo','Adelie','Chinstrap'], kind='scatter', data=penguins)

### `catplot`

In [None]:
sns.catplot(x='island', col='sex', kind='count', data=penguins)

## Bar plots

In [None]:
sns.catplot(x='species', y='body_mass_g', kind='bar',
            height=8, aspect=8/8, data=penguins)

In [None]:
sns.catplot(x='species', y='body_mass_g', ci=None, 
            kind='bar',height=8, aspect=8/8, data=penguins)

In [None]:
sns.catplot(x='body_mass_g', y='species', 
            kind='bar',height=8, aspect=8/8, data=penguins)

## Exercises

### Exercise 1

We will work with the `mpg` dataset. The dataset is available through the seaborn package. You will have to run the following: `mpg = sns.load_dataset('mpg')`.

* Create a kernel distribution plot for the horsepower column. Include ticks on the x axis for the actual data location.
* Create a histogram of the mpg column. Adjust the number of bins to 50.
* Create a scatter plot with the horsepower on the x axis and the mpg on the y axis. Use different colors to indicate the origin of each point.
* Same as above but this time use different colors to indicate the range of weight of each point.
* Create a jointplot depicting the regression line between horsepower and acceleration. 
* Create separate scatter plots with the horsepower on the x axis and the mpg on the y axis for each value of the cylinders column. Arrange the plots in a single column format.
* Create separate scatter plots with the horsepower on the x axis and the acceleration on the y axis for each unique combination of origin and cylinders. 
* Create a scatter plot depicting the residuals arising from regressing horsepower on mpg.
* Create a count plot based on the model_year column. Create separate plots for each `origin`.
* Create a bar plot for the acceleration of cars with different number of cylinders. Changer the orientation so that you have a horizontal bar plot.
