# Chapter 3: The Pandas essentials for data visualization

In [None]:
import pandas as pd

## Get the data

In [None]:
mortality_data = pd.read_pickle('mortality_prepped.pkl')
mortality_data.head()

In [None]:
mortality_wide = pd.read_pickle('mortality_wide.pkl')
mortality_wide.head()

## Long vs. wide data

In [None]:
mortality_data.query('AgeGroup == "01-04 Years"').plot.scatter(x='Year', y='DeathRate')

In [None]:
mortality_wide.plot()

## Plot the data

In [None]:
mortality_data.plot()

In [None]:
mortality_wide.plot()

## Parameters

### Three Basic Parameters of Pandas plot() Method
![figure-3-4.png](attachment:4a45ce37-e491-4620-a871-2a5fae2dcd87.png)

In [None]:
mortality_data.query('AgeGroup == "01-04 Years"').plot.scatter(x='Year', y='DeathRate')

In [None]:
mortality_wide.plot.line(y=['01-04 Years','15-19 Years'])

## Line and area plots

In [None]:
mortality_wide.plot.line()

In [None]:
mortality_wide.plot.area()

## Scatter plots

In [None]:
mortality_data.plot.scatter(x='Year', y='DeathRate')

In [None]:
# mortality_data.plot.scatter()   # Error: must have x and y parameters

In [None]:
# mortality_wide.plot.scatter(x='Year',y='Death_Rate') # Error: 'Year' is in an index

In [None]:
import seaborn as sns
sns.scatterplot(data=mortality_data, x='Year', y='DeathRate', hue='AgeGroup')

## Bar plots

In [None]:
mortality_wide.query('Year in (1900,2000)').plot.bar()

In [None]:
mortality_wide.query('Year in (1900,2000)').plot.barh()

## Histogram and density plots

In [None]:
mortality_data.plot.hist(y='DeathRate',bins=8)

In [None]:
mortality_data.plot.density(y='DeathRate')

In [None]:
mortality_wide.plot.density()   

## Box and pie plots

In [None]:
mortality_wide.plot.box()

In [None]:
mortality_data.groupby('AgeGroup')['DeathRate'].sum().plot.pie()

## Improve the appearance

![figure-3-10.png](attachment:64a95c75-8c79-4d1b-a907-15f16d1f22dd.png)

In [None]:
mortality_wide.plot.line(title='Child Mortality: 1900-2018',
                         ylabel='Deaths per 100,000', grid=True, rot=45)

In [None]:
mortality_wide.plot.line(title='Child Mortality: 2000-2018',
     ylabel='Deaths per 100,000', figsize=(8,4), grid=True, rot=45,
     xlim=(2000,2018), ylim=(0,100))

## Subplots

![figure-3-11.png](attachment:4300f20c-4349-45b7-a067-2f2a98a6a3eb.png)

In [None]:
mortality_wide.plot.line(
    title=['Child Mortality: 01-04','Child Mortality: 05-09',
           'Child Mortality: 10-14','Child Mortality: 15-19'],
    ylabel='Deaths per 100,000', sharey=True,
    grid=True, rot=45, xlim=(1900,1950), legend=False,
    subplots=True, layout=(2,2), figsize=(10,10))

## Chaining

In [None]:
mortality_data.query('Year in (1900,2018)') \
     .pivot(index='AgeGroup', columns='Year', values='DeathRate') \
     .plot.barh()

In [None]:
mortality_data.groupby('Year')['DeathRate'] \
    .agg(['mean','median','std']).plot(ylabel='Deaths per 100,000')