Recently, Alberto Cairo created the [Datasaurus dataset](http://www.thefunctionalart.com/2016/08/download-datasaurus-never-trust-summary.html) which urges people to "never trust summary statistics alone; always visualize your data", since, while the data exhibits normal seeming statistics, plotting the data reveals a picture of a dinosaur. Inspired by Anscombe's Quartet and the Datasaurus, we present, [The Datasaurus Dozen](https://www.autodeskresearch.com/publications/samestats).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
datasaurus = pd.read_csv("DatasaurusDozen.tsv", sep='\t')
dino = datasaurus[datasaurus.dataset == 'dino']

In [None]:
plt.scatter(dino.x, dino.y, s=10)

In [None]:
def plot_datasaurus(datasaurus, cols:int=2, plot_size=4):
    datasets = set(datasaurus.dataset.unique())
    datasets.remove('dino')
    rows = 12 / cols
    plt.figure(figsize=(cols * plot_size, rows * plot_size))
    for i, dataset in enumerate(datasets):
        d = datasaurus[datasaurus.dataset == dataset]
        ax = plt.subplot(rows, cols, i + 1)
        ax.scatter(d.x, d.y, s=10)
        ax.set_ylim([0, 100])
        ax.set_xlim([0, 100])
        ax.set_title(dataset)
        ax.grid(color='gray', linestyle='-', linewidth=0.3)
        ax.set_aspect(aspect=1)
#        plt.axes().set_aspect('equal', 'datalim')
    plt.tight_layout()
    plt.show()
    
plot_datasaurus(datasaurus, cols=3)

### Stats

In [None]:
datasaurus_summary = datasaurus.groupby('dataset')
datasaurus_summary.mean()

In [None]:
print(datasaurus_summary.corr())