# Univariate plots with Seaborn

We can make a lot of univariate plots very easily with Python by using the Seaborn data visualization library.

* "Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics."
* https://seaborn.pydata.org/
* API reference: https://seaborn.pydata.org/api.html

First we import the libraries and the data.

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
titanic_url = 'https://raw.githubusercontent.com/benjum/UCLAX-24Fall-EDA/main/Data/titanic.csv'
titanic = pd.read_csv(titanic_url)

In [None]:
titanic

In [None]:
# Plotting with matplotlib

plt.plot(titanic['Age'])

In [None]:
# Plotting with the plot method of a pandas dataframe

titanic['Age'].plot()

In [None]:
# Plotting with seaborn

sns.lineplot(data=titanic['Age'])

In [None]:
# pandas is built on top of matplotlib
# and you can use matplotlib commands to finesse the plot details

titanic['Age'].plot()
plt.ylabel('Age')

In [None]:
# seaborn is also built on top of matplotlib
# and you can use matplotlib commands to finesse the plot details

sns.lineplot(data=titanic['Age'])
plt.xlabel('index')

## Line plot

In [None]:
sns.lineplot(data=titanic.loc[0:20,'Age'])

In [None]:
sns.lineplot(data=titanic['Age'])

## Scatter plot

In [None]:
sns.scatterplot(data=titanic['Age'])

## Strip plot

In [None]:
sns.stripplot(data=titanic['Age'])

## Swarm plot

In [None]:
sns.swarmplot(data=titanic['Age'])

## Box plot

In [None]:
sns.boxplot(data=titanic['Age'])

In [None]:
# This will give an error!
# Why?

sns.boxplot(data=titanic.loc[titanic['Age']<=20, 'Age'])

In [None]:
sns.boxplot(data=titanic.loc[titanic['Age']<=20, ['Age']])

In [None]:
titanic.loc[titanic['Age']<=20, 'Age']

## Histogram

In [None]:
sns.histplot(data=titanic['Age'])

In [None]:
sns.histplot(data=titanic['Age'], binwidth=30)

## Density plot with a Gaussian kernel density estimator (kde)

In [None]:
sns.kdeplot(data=titanic['Age'])

In [None]:
sns.kdeplot(data=titanic['Age'], log_scale=True)

In [None]:
sns.kdeplot(data=titanic['Age'], bw_adjust=.1)

In [None]:
sns.kdeplot(data=titanic['Age'], bw_adjust=.5)

## Rug plot

In [None]:
sns.rugplot(data=titanic['Age'])

## Combo plot: KDE + Rug plot

In [None]:
sns.kdeplot(data=titanic['Age'], bw_adjust=.5)
sns.rugplot(data=titanic['Age'])

## Combo plot: Histogram + Rug plot

In [None]:
sns.histplot(data=titanic['Age'])
sns.rugplot(data=titanic['Age'])

## Combo plot: Scatter + Rug plot

In [None]:
sns.scatterplot(data=titanic['Age'])
sns.rugplot(data=titanic, y='Age')

## Combo plot: Histogram + KDE plot

In [None]:
sns.histplot(data=titanic['Age'], kde=True)

## Combo plot: Density + KDE plot

In [None]:
sns.histplot(data=titanic['Age'], kde=True, stat='density')

## Combo plot: Density + KDE + Rug plot

In [None]:
sns.histplot(data=titanic['Age'], kde=True, stat='density')
sns.rugplot(data=titanic['Age'])

## Violin plot

In [None]:
sns.violinplot(data=titanic['Age'])

## QQ Plot

In [None]:
from scipy import stats

In [None]:
titanic['Age'].mean()

In [None]:
stats.probplot((titanic['Age'] - titanic['Age'].mean())/titanic['Age'].std(), dist='norm', plot=plt);
plt.plot([-3,3],[-3,3])

In [None]:
t = titanic.loc[titanic['Age']>10]
stats.probplot((t['Age'] - t['Age'].mean())/t['Age'].std(), dist='norm', plot=plt);
plt.plot([-3,3],[-3,3])

# Categorical plots

## Bar plot

In [None]:
titanic['Pclass']

In [None]:
# Seaborn bar plot... but note that this is not really what we want

sns.barplot(data=titanic['Pclass'])

In [None]:
# ... this is also not really what we want, but it does give us a count (of all records)

sns.countplot(data=titanic['Pclass'])

In [None]:
# we change from a specification of the "data" input parameter to "x"

sns.countplot(x=titanic['Pclass'])

In [None]:
# We can also split the specification between the dataframe (for "data") and feature column (for "x", the horizontal axis)

sns.countplot(data=titanic, x='Pclass')

## Pie chart

### Must turn to matplotlib

In [None]:
# Will give an error because seaborn has no pieplot

sns.pieplot(data=titanic, x='species')

In [None]:
titanic.groupby(['Pclass']).count()

In [None]:
titanic['Pclass'].value_counts()

In [None]:
plt.pie(titanic['Pclass'].value_counts());