# Fundamentals of Plotting with Matplotlib

<a href="https://colab.research.google.com/github/bradleyboehmke/uc-bana-4080/blob/main/example-notebooks/13_data_viz_matplotlib.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook accompanies the textbook chapter and allows you to run the code examples interactively.

## Importing the pyplot module

In [None]:
import matplotlib.pyplot as plt

## Figures and axes

In [None]:
fig, ax = plt.subplots()

In [None]:
type(fig)

In [None]:
type(ax)

### Distinguishing the Figure from the Axes

In [None]:
# set figure and axes colors
fig.set_facecolor('skyblue')
ax.set_facecolor('sandybrown')

# show result
fig

### Setting the size of the Figure upon creation

In [None]:
fig, ax = plt.subplots(figsize=(8, 4))
fig.set_facecolor('skyblue')
ax.set_facecolor('sandybrown')

### Calling Axes methods - `get_` and `set_` methods

In [None]:
ax.get_title()

In [None]:
ax.set_title('My First Matplotlib Graph')

In [None]:
fig

In [None]:
ax.get_title()

In [None]:
ax.get_xlim()

In [None]:
ax.get_ylim()

In [None]:
ax.set_xlim(0, 5)
ax.set_ylim(-10, 50)
fig

In [None]:
ax.get_xticks()

In [None]:
ax.get_yticks()

In [None]:
ax.set_xticks([1.8, 3.99, 4.4])
ax.set_yticks([-99, -9, -1, 22, 44])
fig

In [None]:
ax.get_xticklabels()

In [None]:
ax.set_xticklabels(['dog', 'cat', 'snake'])
ax.set_yticklabels(['Boehmke', 'D', 'A', 'R', 'B'])
fig

### Setting text styles

In [None]:
ax.set_title(
    'Tests',
    size=20, 
    color='firebrick',
    backgroundcolor='steelblue',
    fontname='Courier New',
    rotation=70
    )
fig

In [None]:
ax.set_xlabel(
    'New and Imporved X-Axis Stylized Label',
    size=15,
    color='indigo', 
    fontname='Times New Roman', 
    rotation=15
    )
fig

## Plotting Data

In [None]:
import pandas as pd
from completejourney_py import get_data

cj_data = get_data()
df = (
    cj_data['transactions']
    .merge(cj_data['products'], how='inner', on='product_id')
    .merge(cj_data['demographics'], how='inner', on='household_id')
)

df.head()

### Line `plot`s

In [None]:
daily_sales = (
    df
    .set_index('transaction_timestamp')['sales_value']
    .resample('D')
    .sum()
    .to_frame()
    .reset_index()
)
daily_sales.head()

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))
ax.plot('transaction_timestamp', 'sales_value', data=daily_sales);

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

# create/modify line plot
ax.plot(
    'transaction_timestamp', 
    'sales_value', 
    data=daily_sales, 
    linestyle=':', 
    color='gray',
    linewidth=2
    )

# add additional context
ax.set_title('Total daily sales across all stores', size=20)
ax.set_ylabel('Total sales ($)');

In [None]:
from datetime import date as dt

fig, ax = plt.subplots(figsize=(10, 4))

# create/modify line plot
ax.plot(
    'transaction_timestamp', 
    'sales_value', 
    data=daily_sales, 
    linestyle=':', 
    color='gray',
    linewidth=2
    )

# add additional context
ax.set_title('Total daily sales across all stores', size=20)
ax.set_ylabel('Total sales ($)');

# add gridlines, arrow, and text
ax.grid(linestyle='dashed')
ax.annotate(
    'Christmas Eve', 
    xy=([dt(2017, 12, 20), 15900]), 
    xytext=([dt(2017, 9, 1), 15500]), 
    arrowprops={'color':'blue', 'width':2},
    size=10
    );

### Other plots

In [None]:
totals_by_store = df.groupby('store_id').agg({'sales_value': 'sum', 'quantity': 'sum'})

# histogram
fig, ax = plt.subplots(figsize=(8, 4))
ax.hist('sales_value', data=totals_by_store, bins=30);

In [None]:
# boxplot
fig, ax = plt.subplots(figsize=(8, 4))
ax.boxplot('sales_value', data=totals_by_store, vert=False)

# adjust axes
ax.set_xscale('log')
ax.set_yticklabels('')
ax.set_yticks([]);

In [None]:
# scatter plot
fig, ax = plt.subplots(figsize=(8, 4))
ax.scatter('quantity', 'sales_value', data=totals_by_store, c='gray', s=5);

### Adding more dimensions

In [None]:
store_count = (
    df
    .groupby('store_id', as_index=False)
    .size()
    .rename(columns={'size': 'n'})
)

totals_by_store = (
    df
    .groupby('store_id', as_index=False)
    .agg({'sales_value': 'sum', 'quantity': 'sum'})
    .merge(store_count)
)

totals_by_store.head()

In [None]:
size_adj = totals_by_store['n']**0.4
n_outliers = totals_by_store['n'] > totals_by_store['n'].quantile(0.95)

fig, ax = plt.subplots(figsize=(8, 4))
ax.scatter('quantity', 'sales_value', data=totals_by_store, c=n_outliers, s=size_adj);

### Multiple plots

In [None]:
fig, ax_array = plt.subplots(2, 2, figsize=(8, 6), constrained_layout=True)

In [None]:
type(ax_array)

In [None]:
ax_array.shape

In [None]:
ax_array

In [None]:
ax1 = ax_array[0, 0]  # row 0, col 0
ax2 = ax_array[0, 1]  # row 0, col 1
ax3 = ax_array[1, 0]  # row 1, col 0
ax4 = ax_array[1, 1]  # row 1, col 1

In [None]:
# plot 1
ax1.plot(
    'transaction_timestamp', 
    'sales_value', 
    data=daily_sales, 
    linestyle=':', 
    color='gray',
    linewidth=2
    )

# add additional context
ax1.set_title('Total daily sales across all stores', size=12)
ax1.set_ylabel('Total sales ($)');

# add gridlines, arrow, and text
ax1.grid(linestyle='dashed')
ax1.tick_params(axis='x', which='major', labelsize=8, labelrotation=45)
ax1.annotate(
    'Christmas Eve', 
    xy=([dt(2017, 12, 20), 15900]), 
    xytext=([dt(2017, 7, 1), 15500]), 
    arrowprops={'color':'blue', 'width':0.5},
    size=8
    )
    
# plot 2
ax2.scatter('quantity', 'sales_value', data=totals_by_store, c=n_outliers, s=size_adj)
ax2.set_title('Total store-level sales vs quantity.', size=12)

# plot 3
ax3.hist('sales_value', data=totals_by_store, bins=30)
ax3.set_title('Histogram of total store-level sales.', size=12)

# plot 4
ax4.boxplot('quantity', data=totals_by_store, vert=False)
ax4.set_xscale('log')
ax4.set_yticklabels('')
ax4.set_yticks([])
ax4.set_title('Histogram of total store-level quantity.', size=12);

# final plot title
fig.suptitle('Total store-level sales and quantities (2017)', fontsize=20)
fig

## Exercise: Customizing Pizza Visualizations with Matplotlib

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from completejourney_py import get_data

cj_data = get_data()
df = (
    cj_data['transactions']
    .merge(cj_data['products'], how='inner', on='product_id')
    .merge(cj_data['demographics'], how='inner', on='household_id')
)