In [91]:
from collections import Counter
from typing import Iterable, List, Union

In [116]:
import math


def Fraction(n, d) -> str:
    return f'Fraction({n}, {d})'


class Sample(Counter):
    '''A collection of (outcome, frequency) pairs'''
    
    def mean(self, as_frac=False) -> Union[float, Fraction]:
        num = sum(outcome * freq for outcome, freq in self.items())
        denom = self.total()
        return Fraction(num, denom) if as_frac else num / denom
    
    
    def median(self) -> Union[int, float]:
        elements = sorted(self.elements())
        mid = len(elements) // 2
        if len(elements) % 2 == 1:
            return elements[mid]
        else:
            return (elements[mid - 1] + elements[mid]) / 2
    
    
    def variance(self, as_frac=False) -> Union[float, Fraction]:
        mean = self.mean()
        num = sum((outcome - mean)**2 for outcome in self.elements())
        denom = self.total() - 1 # why do we do this?
        return Fraction(num, denom) if as_frac else num / denom
    
    
    def stdev(self) -> float:
        return math.sqrt(self.variance())
    
    
    def quartiles(self) -> List[float]:
        quartiles = []
        elements = sorted(map(float, self.elements()))
        
        '''Given non-integer value i, return the average of the elements
        in the positions on either side of i (e.g. 6th and 7th elements
        for i = 6.25). '''
        avg = lambda i : (elements[int(i) - 1] + elements[int(i)]) / 2
        
        for q in [0.25, 0.5, 0.75]:
            pos = q * (len(elements) + 1)
            if pos.is_integer():
                quartiles.append(elements[int(pos) - 1])
            else:
                quartiles.append(avg(pos))
        
        return quartiles
    
    
    def percent_within(self, ndev=1) -> float:
        '''Return the proportion of items within ndev
        standard deviations of the sample mean.'''
        m = self.mean()
        s = self.stdev()
        return sum(1 if m - ndev * s <= elem <= m + ndev * s else 0 for elem in self.elements()) / self.total()


In [117]:
# Some tests using example problems from Navidi

assert Sample([1, 2, 3, 4]).median() == 2.5
assert Sample([1, 2, 2, 3, 4]).median() == 2

assert round(Sample([65.51, 72.30, 68.31, 67.05, 70.68]).stdev(), 2) == 2.73

assert Sample([30, 75, 79, 80, 80, 105, 126, 138, 149, 179, 179, 191,
             223, 232, 232, 236, 240, 242, 245, 247, 254, 274, 384, 470]).quartiles() == [115.5, 207.0, 243.5]

#### Navidi 1.2.10, Cars

In [100]:
dist = Sample({1: 70, 2: 15, 3: 10, 4: 3, 5: 2})

In [101]:
dist

Sample({1: 70, 2: 15, 3: 10, 4: 3, 5: 2})

##### a)

In [102]:
dist.mean()

1.52

In [103]:
dist.mean(as_frac=True)

'Fraction(152, 100)'

##### b)

In [104]:
dist.stdev()

0.9372213390570224

##### c)

In [105]:
dist.median()

1.0

##### d)

In [106]:
dist.quartiles()

[1.0, 1.0, 2.0]

##### e)

In [115]:
gt_mean = lambda x : x > dist.mean()

sum(1 if gt_mean(elem) else 0 for elem in dist.elements()) / dist.total()

0.3

##### f)

In [112]:
1 - dist.percent_within()

0.15000000000000002

##### g)

In [113]:
dist.percent_within()

0.85