# Compute Statistics from Card Draws

In [None]:
import numpy as np
import pandas as pd
from enum import Enum
import matplotlib
import matplotlib.pyplot as plt

# Source: https://docs.python.org/3.4/library/enum.html#orderedenum
# https://github.com/hale/udacity-descriptive-statistics-project/blob/master/

class OrderedEnum(Enum):
    def __ge__(self, other):
        if self.__class__ is other.__class__:
            return self.value >= other.value
        return NotImplemented
    def __gt__(self, other):
        if self.__class__ is other.__class__:
            return self.value > other.value
        return NotImplemented
    def __le__(self, other):
        if self.__class__ is other.__class__:
            return self.value <= other.value
        return NotImplemented
    def __lt__(self, other):
        if self.__class__ is other.__class__:
            return self.value < other.value
        return NotImplemented

class Rank(OrderedEnum):
    ace = 1
    two = 2
    three = 3
    four = 4
    five = 5
    six = 6
    seven = 7
    eight = 8
    nine = 9
    ten = 10
    jack = 11
    queen = 12
    king = 13
    
    @property
    def score(self):
        if (self.value <= 10):
            return self.value
        else:
            return 10
        
Suit = OrderedEnum("Suit", "heart diamond club spade")

class Card(object):
    def __init__(self, rank, suit):
        self.rank = rank
        self.suit = suit
        
    def __str__(self):
        return "{} of {}s".format(self.rank.name.capitalize(), self.suit.name.capitalize())
    
    def __repr__(self):
        return "{} of {}s".format(self.rank.name.capitalize(), self.suit.name.capitalize())
    
    def __value__(self):
        return self.rank.score()

class Deck(object):
    def __init__(self):
        self.cards = []
        for suit in Suit:
            for rank in Rank:
                card = Card(rank, suit)
                self.cards.append(card)
                
print(Deck().cards)

In [None]:
df = pd.DataFrame({'card': Deck().cards})
df['suit'] = df.card.map(lambda c: c.suit)
df['rank'] = df.card.map(lambda c: c.rank)
df['score'] = df.card.map(lambda c: c.rank.score)
df.tail()

## Distribution of Card Values

In [None]:
# Create a histogram representing the relative frequencies of the card values from a single draw.
%matplotlib inline
df.hist()

* What is the mean of the card values for a single draw?
* What is the median of the card values for a single draw?
* What is the standard deviation of the card values for a single draw? (population sd)

In [None]:
print(df.describe())
print()
print(df.median())
print()
print(df.std(ddof=0))

## Get Samples from the Deck of Cards

To obtain a single sample, shuffle your deck of cards and draw three cards from it. (You will be sampling from the deck without replacement.) Record the cards that you have drawn and the sum of the three cards’ values. Repeat this sampling procedure a total of at least thirty times.

In [None]:
def get_sample(n = 3):
    return df.reindex(np.random.permutation(df.index)).head(n)

get_sample()

In [None]:
samples = []
for _ in range(10000):
    samples.append(get_sample()['score'].sum())

samples = pd.Series(samples)

## Distribution of Values

In [None]:
samples.describe()

In [None]:
samples.hist(bins = 26)

The distribution of sample values is normal. This is because the distribution of sample means (or in this case, sums) is normal for any population. This is given by The Central Limit Theorem.

* What range of values do you expect 90% of three-card draw sums to fall into?

90% of the draw values should fall within the 5th and 95th percentile. The z-scores that correspond to these percentiles are -1.645 and 1.645, respectively. For our estimated mean of 19.54 and our estimated standard deviation of 5.39, these z-scores correspond to:

* z = (x - sample_mean) / SE => -1.645 = (x - 19.54) / 5.39 => x = 10.67
* z = (x - sample_mean) / SE => 1.645 = (x - 19.54) / 5.39 => x = 28.41

Therefore, we expect approximately 90% of the draw values to fall in the range (10.67, 28.41).

* How likely are you to get a draw of at least 20 from a random draw?

Using our estimated mean and standard deviation, 20 corresponds to a z-score of:

* z = (x - sample_mean) / SE => z = (20 - 19.54) / 5.39 => z = 0.085

Using this, we find that the probability that the draw value will be less than 20 is 0.534. Therefore, the probability that the draw value will be at least 20 is 0.466.