# Anscombe's quartet

# Document

<table align="left">
    <tr>
        <th class="text-align:left">Title</th>
        <td class="text-align:left">Anscombe's quartet</td>
    </tr>
    <tr>
        <th class="text-align:left">Last modified</th>
        <td class="text-align:left">2019-11-14</td>
    </tr>
    <tr>
        <th class="text-align:left">Author</th>
        <td class="text-align:left">Gilles Pilon <gillespilon13@gmail.com></td>
    </tr>
    <tr>
        <th class="text-align:left">Status</th>
        <td class="text-align:left">Active</td>
    </tr>
    <tr>
        <th class="text-align:left">Type</th>
        <td class="text-align:left">Jupyter notebook</td>
    </tr>
    <tr>
        <th class="text-align:left">Created</th>
        <td class="text-align:left">2017-07-30</td>
    </tr>
    <tr>
        <th class="text-align:left">File name</th>
        <td class="text-align:left">anscombes_quartet.ipynb</td>
    </tr>
    <tr>
        <th class="text-align:left">Other files required</th>
        <td class="text-align:left">aq1.csv<br />aq2.csv<br />aq3.csv<br />aq4.csv</td>
    </tr>
</table>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from numpy.polynomial import polynomial as nppoly
import matplotlib.cm as cm
import matplotlib.axes as axes
from matplotlib.gridspec import GridSpec


%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [None]:
def despine(ax: axes.Axes) -> None:
    '''
    Remove the top and right spines of a graph.

    Used to enforce standard and *correct* style. There is only one x,
    and one y axis, left and bottom, therefore there should only be
    these axes.
    '''
    for spine in 'right', 'top':
        ax.spines[spine].set_visible(False)

In [None]:
aq1 = pd.read_csv('aq1.csv')
aq2 = pd.read_csv('aq2.csv')
aq3 = pd.read_csv('aq3.csv')
aq4 = pd.read_csv('aq4.csv')

In [None]:
title = "Anscombe's Quartet"
yaxislabel = 'Y'
xaxislabel = 'X'
xlim = [2, 20]
ylim = [2, 14]
fighw = [8, 6]

In [None]:
c = cm.Paired.colors
# c[0] c[1] ... c[11]
# See "paired" in "qualitative colormaps"
# https://matplotlib.org/tutorials/colors/colormaps.html

In [None]:
# This code is incomplete. I am simplifying the code.
plt.figure(figsize=(fighw))
plt.suptitle(title, fontweight="bold")
for i in range(2):
    for j in range(2):
        ax = plt.subplot2grid((2, 2), (i, j))
        ax.scatter(range(20), range(20) + \
                   np.random.randint(-5, 5, 20), color=c[0])
        ax.set_ylabel(yaxislabel)
        ax.set_xlabel(xaxislabel)
        ax.set_title(f'Row {i} Column {j}')
        despine(ax)
plt.tight_layout(pad=3)

In [None]:
# This code is incomplete. I am simplifying the code.
# GridSpec is the matplotlib recommended method
fig = plt.figure(figsize=(fighw))
fig.suptitle(title, fontweight="bold")
gs = GridSpec(2, 2, figure=fig)
for i in range(2):
    for j in range(2):
        ax = fig.add_subplot(gs[i, j])
        ax.scatter(range(20), range(20) + \
                   np.random.randint(-5, 5, 20), color=c[0])
        ax.set_ylabel(yaxislabel)
        ax.set_xlabel(xaxislabel)
        ax.set_title(f'Row {i} Column {j}')
        despine(ax)
plt.tight_layout(pad=3)

In [None]:
plt.figure(figsize=(fighw))
plt.suptitle(title, fontweight="bold")
ax1 = plt.subplot2grid((2,2), (0,0))
ax1.scatter(aq1['x'], aq1['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="I")
m1, b1 = np.polyfit(aq1['x'], \
                    aq1['y'], 1)
ax1.plot(aq1['x'], m1*aq1['x'] + \
         b1, '-', color=c[1], \
        label="Linear")
ax1.set_ylabel(yaxislabel)
ax1.set_xlabel(xaxislabel)
despine(ax1)
ax1.set_ylim(ylim)
ax1.set_xlim(xlim)
ax1.set_title('Data set I', fontsize=10)
ax2 = plt.subplot2grid((2,2), (0,1))
ax2.scatter(aq2['x'], aq2['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="II")
m2, b2 = np.polyfit(aq2['x'], \
                    aq2['y'], 1)
ax2.plot(aq2['x'], m2*aq2['x'] + \
         b2, '-', color=c[1])
ax2.set_ylabel(yaxislabel)
ax2.set_xlabel(xaxislabel)
despine(ax2)
ax2.set_ylim(ylim)
ax2.set_xlim(xlim)
ax2.set_title('Data set II', fontsize=10)
ax3 = plt.subplot2grid((2,2), (1,0))
m3, b3 = np.polyfit(aq3['x'], \
                    aq3['y'], 1)
ax3.scatter(aq3['x'], aq3['y'], \
            color=c[0], \
           linewidth=0, linestyle="-", s=10, label="III")
ax3.plot(aq3['x'], m3*aq3['x'] + \
         b3, '-', color=c[1])
ax3.set_ylabel(yaxislabel)
ax3.set_xlabel(xaxislabel)
despine(ax3)
ax3.set_ylim(ylim)
ax3.set_xlim(xlim)
ax3.set_title('Data set III', fontsize=10)
ax4 = plt.subplot2grid((2,2), (1,1))
ax4.scatter(aq4['x'], aq4['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="IV")
m4, b4 = np.polyfit(aq4['x'], \
                    aq4['y'], 1)
ax4.set_ylabel(yaxislabel)
ax4.set_xlabel(xaxislabel)
despine(ax4)
ax4.set_ylim(ylim)
ax4.set_xlim(xlim)
ax4.set_title('Data set IV', fontsize=10)
ax4.plot(aq4['x'], m4*aq4['x'] + \
         b4, '-', color=c[1])
plt.tight_layout(pad=2)
plt.savefig('aq_original.svg', format='svg')

In [None]:
fig, ax = plt.subplots(figsize=(fighw))
despine(ax)
b, m = nppoly.polyfit(aq1['x'], aq1['y'], 1)
ax.plot(aq1['x'], m*aq1['x'] + b, '-', color=c[1])
ax.scatter(aq1['x'], aq1['y'], \
           color=c[0], \
           linewidth=0, linestyle="-", s=10, label="I")
ax.set_ylim(ylim)
ax.set_xlim(xlim)
ax.text(4, 13, 'Data set I', fontsize=10)
ax.set_ylabel(yaxislabel)
ax.set_xlabel(xaxislabel)
ax.set_title(title, fontweight="bold")
ax.figure.savefig('aq1.svg', format='svg')

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
despine(ax)
b, m = nppoly.polyfit(aq2['x'], aq2['y'], 1)
ax.plot(aq2['x'], m*aq2['x'] + b, '-', color=c[1])
ax.scatter(aq2['x'], aq2['y'], \
           color=c[0], \
           linewidth=0, linestyle="-", s=10, label="II")
ax.set_ylim(ylim)
ax.set_xlim(xlim)
ax.text(4, 13, 'Data set II', fontsize=10)
ax.set_ylabel(yaxislabel)
ax.set_xlabel(xaxislabel)
ax.set_title(title, fontweight="bold")
ax.figure.savefig('aq2.svg', format='svg')

In [None]:
fig, ax =plt.subplots(figsize=(8,6))
despine(ax)
b, m = nppoly.polyfit(aq3['x'], aq3['y'], 1)
ax.plot(aq3['x'], m*aq3['x'] + b, '-', color=c[1])
ax.scatter(aq3['x'], aq3['y'], \
           color=c[0], \
           linewidth=0, linestyle="-", s=10, label="III")
ax.set_ylim(ylim)
ax.set_xlim(xlim)
ax.text(4, 13, 'Data set III', fontsize=10)
ax.set_ylabel(yaxislabel)
ax.set_xlabel(xaxislabel)
ax.set_title(title, fontweight="bold")
ax.figure.savefig('aq3.svg', format='svg')

In [None]:
fig, ax = plt.subplots(figsize=(8,6))
despine(ax)
b, m = nppoly.polyfit(aq4['x'], aq4['y'], 1)
ax.plot(aq4['x'], m*aq4['x'] + b, '-', color=c[1])
ax.scatter(aq4['x'], aq4['y'], \
           color=c[0], \
           linewidth=0, linestyle="-", s=10, label="IV")
ax.set_ylim(ylim)
ax.set_xlim(xlim)
ax.text(4, 13, 'Data set IV', fontsize=10)
ax.set_ylabel(yaxislabel)
ax.set_xlabel(xaxislabel)
ax.set_title(title, fontweight="bold")
ax.figure.savefig('aq4.svg', format='svg')

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(fighw))
fig.suptitle(title, fontweight='bold')
ax1.scatter(aq1['x'], aq1['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="I")
m1, b1 = np.polyfit(aq1['x'], \
                    aq1['y'], 1)
ax1.plot(aq1['x'], m1*aq1['x'] + \
         b1, '-', color=c[1], \
        label="Linear")
ax1.set_ylabel(yaxislabel)
ax1.set_xlabel(xaxislabel)
ax1.set_ylim(ylim)
ax1.set_xlim(xlim)
ax1.set_title('Data set I', fontsize=10)
despine(ax1)
ax2.scatter(aq2['x'], aq2['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="II")
m2, b2 = np.polyfit(aq2['x'], \
                    aq2['y'], 1)
ax2.plot(aq2['x'], m2*aq2['x'] + \
         b2, '-', color=c[1])
ax2.set_ylabel(yaxislabel)
ax2.set_xlabel(xaxislabel)
despine(ax2)
ax2.set_ylim(ylim)
ax2.set_xlim(xlim)
ax2.set_title('Data set II', fontsize=10)
m3, b3 = np.polyfit(aq3['x'], \
                    aq3['y'], 1)
ax3.scatter(aq3['x'], aq3['y'], \
            color=c[0], \
           linewidth=0, linestyle="-", s=10, label="III")
ax3.plot(aq3['x'], m3*aq3['x'] + \
         b3, '-', color=c[1])
ax3.set_ylabel(yaxislabel)
ax3.set_xlabel(xaxislabel)
despine(ax3)
ax3.set_ylim(ylim)
ax3.set_xlim(xlim)
ax3.set_title('Data set III', fontsize=10)
ax4.scatter(aq4['x'], aq4['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="IV")
m4, b4 = np.polyfit(aq4['x'], \
                    aq4['y'], 1)
ax4.set_ylabel(yaxislabel)
ax4.set_xlabel(xaxislabel)
despine(ax4)
ax4.set_ylim(ylim)
ax4.set_xlim(xlim)
ax4.set_title('Data set IV', fontsize=10)
ax4.plot(aq4['x'], m4*aq4['x'] + \
         b4, '-', color=c[1])
ax4.set_title('Data set IV')
plt.tight_layout(pad=2)
plt.savefig('aq_tight.svg')

In [None]:
fig = plt.figure(figsize=(fighw))
fig.suptitle(title, fontweight='bold')
gs = GridSpec(2, 2, figure=fig)
# ax1 = fig.add_subplot(gs[0, 0])
ax1 = fig.add_subplot(gs[0])
ax1.scatter(aq1['x'], aq1['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="I")
m1, b1 = np.polyfit(aq1['x'], \
                    aq1['y'], 1)
ax1.plot(aq1['x'], m1*aq1['x'] + \
         b1, '-', color=c[1], \
        label="Linear")
ax1.set_ylabel(yaxislabel)
ax1.set_xlabel(xaxislabel)
ax1.set_ylim(ylim)
ax1.set_xlim(xlim)
ax1.set_title('Data set I', fontsize=10)
despine(ax1)
# ax2 = fig.add_subplot(gs[0, 1])
ax2 = fig.add_subplot(gs[1])
ax2.scatter(aq2['x'], aq2['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="II")
m2, b2 = np.polyfit(aq2['x'], \
                    aq2['y'], 1)
ax2.plot(aq2['x'], m2*aq2['x'] + \
         b2, '-', color=c[1])
ax2.set_ylabel(yaxislabel)
ax2.set_xlabel(xaxislabel)
ax2.set_ylim(ylim)
ax2.set_xlim(xlim)
ax2.set_title('Data set II', fontsize=10)
despine(ax2)
# ax3 = fig.add_subplot(gs[1, 0])
ax3 = fig.add_subplot(gs[2])
m3, b3 = np.polyfit(aq3['x'], \
                    aq3['y'], 1)
ax3.scatter(aq3['x'], aq3['y'], \
            color=c[0], \
           linewidth=0, linestyle="-", s=10, label="III")
ax3.plot(aq3['x'], m3*aq3['x'] + \
         b3, '-', color=c[1])
ax3.set_ylabel(yaxislabel)
ax3.set_xlabel(xaxislabel)
ax3.set_ylim(ylim)
ax3.set_xlim(xlim)
ax3.set_title('Data set III', fontsize=10)
despine(ax3)
# ax4 = fig.add_subplot(gs[1, 1])
ax4 = fig.add_subplot(gs[3])
ax4.scatter(aq4['x'], aq4['y'], \
            color=c[0], linewidth=0, linestyle="-", s=10, \
            label="IV")
m4, b4 = np.polyfit(aq4['x'], \
                    aq4['y'], 1)
ax4.set_ylabel(yaxislabel)
ax4.set_xlabel(xaxislabel)
ax4.set_ylim(ylim)
ax4.set_xlim(xlim)
ax4.set_title('Data set IV', fontsize=10)
ax4.plot(aq4['x'], m4*aq4['x'] + \
         b4, '-', color=c[1])
ax4.set_title('Data set IV')
despine(ax4)
plt.tight_layout(pad=2)
plt.savefig('aq_gridspec.svg')

# References

[Wikipedia Anscombe's Quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet)

[matplotlib API overview](https://matplotlib.org/api/index.html)

[matplotlib API index](https://matplotlib.org/genindex.html)

[matplotlib.tight_layout module](https://matplotlib.org/api/tight_layout_api.html#module-matplotlib.tight_layout)