# Anscombe's quartet

- Create Anscombe's quartet of four graphs as four axes in one figure.
- Create Anscombe's quarter of four graphs as four separate figures.

# Document

<table align="left">
    <tr>
        <th class="text-align:left">Title</th>
        <td class="text-align:left">Anscombe's quartet</td>
    </tr>
    <tr>
        <th class="text-align:left">Last modified</th>
        <td class="text-align:left">2019-11-25</td>
    </tr>
    <tr>
        <th class="text-align:left">Author</th>
        <td class="text-align:left">Gilles Pilon <gillespilon13@gmail.com></td>
    </tr>
    <tr>
        <th class="text-align:left">Status</th>
        <td class="text-align:left">Active</td>
    </tr>
    <tr>
        <th class="text-align:left">Type</th>
        <td class="text-align:left">Jupyter notebook</td>
    </tr>
    <tr>
        <th class="text-align:left">Created</th>
        <td class="text-align:left">2017-07-30</td>
    </tr>
    <tr>
        <th class="text-align:left">File name</th>
        <td class="text-align:left">anscombes_quartet.ipynb</td>
    </tr>
    <tr>
        <th class="text-align:left">Other files required</th>
        <td class="text-align:left">aq1.csv<br />aq2.csv<br />aq3.csv<br />aq4.csv</td>
    </tr>
</table>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from numpy.polynomial import polynomial as nppoly
import matplotlib.cm as cm
import matplotlib.axes as axes
from matplotlib.gridspec import GridSpec


%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [None]:
def despine(ax: axes.Axes) -> None:
    '''
    Remove the top and right spines of a graph.

    Used to enforce standard and *correct* style. There is only one x,
    and one y axis, left and bottom, therefore there should only be
    these axes.
    '''
    for spine in 'right', 'top':
        ax.spines[spine].set_visible(False)

In [None]:
def plot_scatter(dfx, dfy, i, j):
    '''
    Plot each Anscombe Quartet graph in a figure by itself.
    '''
    fig = plt.figure(figsize=fighw)
    fig.suptitle(fig_title, fontweight='bold')
    ax = fig.add_subplot(111)
    ax.scatter(dfx, dfy,
               color=c[0], linewidth=0,
               linestyle="-", s=10, label="I")
    b, m = nppoly.polyfit(dfx, dfy, 1)
    ax.plot(dfx, m*dfx + b, '-', color=c[1])
    ax.set_ylim(ylim)
    ax.set_xlim(xlim)
    ax.set_title(ax_title[i][j])
    ax.set_ylabel(yaxislabel)
    ax.set_xlabel(xaxislabel)
    despine(ax)
    return ax

In [None]:
def read_files():
    '''
    Read each of the Anscombe Quartet data files into a separate dataframe.
    '''
    aq1 = pd.read_csv('aq1.csv')
    aq2 = pd.read_csv('aq2.csv')
    aq3 = pd.read_csv('aq3.csv')
    aq4 = pd.read_csv('aq4.csv')
    return aq1, aq2, aq3, aq4

In [None]:
def plot_one_in_four(df):
    '''
    Plot each Anscombe Quartet graph in a figure by itself.
    '''
    for i in range(2):
        for j in range(2):
            plot_scatter(df[i][j]['x'], df[i][j]['y'], i, j)
            plt.savefig(f'aq{i}{j}.svg')

In [None]:
def plot_four_in_one(df):
    '''
    Plot each Anscombe Quartet graph in an axes within a figure.
    '''
    fig = plt.figure(figsize=(fighw))
    fig.suptitle(fig_title, fontweight="bold")
    gs = GridSpec(2, 2, figure=fig)
    for i in range(2):
        for j in range(2):
            ax = fig.add_subplot(gs[i, j])
            ax.scatter(df[i][j]['x'], df[i][j]['y'],
                       color=c[0], linewidth=0,
                       linestyle="-", s=10)
            b, m = nppoly.polyfit(df[i][j]['x'], df[i][j]['y'], 1)
            ax.plot(df[i][j]['x'], m*df[i][j]['x'] + \
                    b, '-', color=c[1])
            ax.set_ylim(ylim)
            ax.set_xlim(xlim)
            ax.set_title(ax_title[i][j])
            ax.set_ylabel(yaxislabel)
            ax.set_xlabel(xaxislabel)
            despine(ax)
    plt.tight_layout(pad=3)
    plt.savefig('aq.svg')

In [None]:
if __name__ == '__main__':
    fig_title = "Anscombe's Quartet"
    ax_title = [('Data set I', 'Data set II'),
                ('Data set III', 'Dataset IV')]
    yaxislabel = 'Y'
    xaxislabel = 'X'
    xlim = [2, 20]
    ylim = [2, 14]
    fighw = [8, 6]
    c = cm.Paired.colors
    aq1, aq2, aq3, aq4 = read_files()
    df = [(aq1, aq2), (aq3, aq4)]
    plot_four_in_one(df)
    plot_one_in_four(df)

# References

[Wikipedia Anscombe's Quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet)

[matplotlib API overview](https://matplotlib.org/api/index.html)

[matplotlib API index](https://matplotlib.org/genindex.html)

[matplotlib.tight_layout module](https://matplotlib.org/api/tight_layout_api.html#module-matplotlib.tight_layout)

[numpy.polynomial.polynomial.polyfit](https://docs.scipy.org/doc/numpy/reference/generated/numpy.polynomial.polynomial.polyfit.html#numpy.polynomial.polynomial.polyfit)