In [1]:
# HIDDEN
from datascience import *
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')


In [2]:
# A simple computational model of a die table containing the dots on each side
die = Table([[1,2,3,4,5,6]],['side'])

In [3]:
die

side
1
2
3
4
5
6


In [4]:
# Simulate the roll of a die by sampling from the die table
def roll_die():
    return die.sample(1)['side'][0]

In [5]:
# roll it.  Try this over and over and see what you get
roll_die()

4

In [6]:
# Simulate rolling it many times, creating a table that records the rolls
num_rolls = 600
rolls = Table([[roll_die() for i in range(num_rolls)]],['roll'])
rolls

roll
5
5
5
1
2
3
6
5
6
5


In [7]:
bins = np.arange(1,8)

NameError: name 'np' is not defined

The histogram of the rolls shows what we mean by 'uniform at random'.  All sides are equally likely to come up on each roll.  Thus the number of times each comes up in a large number of rolls is nearly constant.  But not quite.
The rolls table it self won't change on its own, but every time you run the cell above, you will get a slightly different picture.

In [None]:
rolls.hist(bins=bins)

In [None]:
# Normalize this gives a distribution.  The probability of each side appearing.  1/6.
rolls.hist(normed=True,bins=bins)

In [None]:
roll_dist = rolls.bin(normed=True,bins=bins).take(range(6))
roll_dist

In [None]:
1/6

In [None]:
# What is the average value of a roll?
np.sum(roll_dist['bin']*roll_dist['roll density'])

In [None]:
np.mean(rolls['roll'])

In [None]:
# Life is about rolling lots of dice.
# Simulate rolling n dice.
def roll(n):
    """Roll n die.  Return a table of the rolls"""
    return die.sample(n, with_replacement=True)

In [None]:
# try it out.  many times
roll(10)

In [None]:
num_die = 10

In [None]:
num_rolls = 100

In [None]:
# Remember - referencing a column gives an array
roll(num_die)['side']

In [None]:
# Simulate rolling num_die dice num_rolls times and build a table of the result
rolls = Table.from_rows([roll(num_die)['side'] for i in range(num_rolls)],["roll_"+str(i) for i in range(num_die)])
rolls

In [None]:
# If we think of each row as a life experience, what is the life like?
label = "{}_dice".format(num_die)
sum_rolls = Table([[np.sum(roll(num_die)['side']) for i in range(num_rolls)]],[label])
sum_rolls.hist(range=[10,6*num_die])
sum_rolls.stats()

In [None]:
# Or as a distribution
sum_rolls.hist(range=[10,6*num_die],normed=True)

In [None]:
# Normalized for the number of die
Table([sum_rolls[label]/num_die],['roll']).hist(normed=True,range=(1,6))

In probability theory, the central limit theorem (CLT) states that, given certain conditions, the arithmetic mean of a sufficiently large number of iterates of independent random variables, each with a well-defined expected value and well-defined variance, will be approximately normally distributed, regardless of the underlying distribution.