## Q1.

Add a __setitem__ to the python linked list implementation from the lecture (this past wednesday).

In [3]:
alist=[1,2,3,4]
alist.__getitem__(2)

3

In [4]:
#your code here
from doctest import run_docstring_examples as dtest
import numbers
import reprlib
class LL:
    """
    >>> A = LL()  
    >>> A[0]
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
    >>> A[0] = 1
    Traceback (most recent call last):
        ...
    IndexError: trying to set an empty LL
    >>> A.insert_front(1)
    >>> A[0]
    1
    >>> A.insert_back(2)
    >>> A[1]
    2
    >>> A
    LL([1,...])
    >>> myll = LL.from_components([1,2])
    >>> myll[1]
    1
    >>> len(myll)
    2
    >>> myll[2]
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1]
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    >>> myll[0] = 3
    >>> myll[0]
    3
    >>> myll[1] = 4
    >>> myll[1]
    4
    >>> myll[2] = 5
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1] = 6
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    """
    @classmethod
    def from_components(cls, components):
        inst = cls(components[0])
        for c in components[1:]:
            inst.insert_front(c)
        return inst
        
    def __init__(self, head=None):
        if head is None:
            self._headNode = None
        else:
            self._headNode = [head, None]
            
    def insert_front(self, element):
        new_node = [element, None]
        new_node[1] = self._headNode
        self._headNode = new_node
        
    def insert_back(self, element):
        new_node = [element, None]
        curr_ptr = self._headNode
        while curr_ptr[1] is not None:
            curr_ptr = curr_ptr[1]
        curr_ptr[1]= new_node
        
    def __repr__(self):
        class_name = type(self).__name__
        if len(self)==0:
            components=""
        else:
            components = reprlib.repr(self[0])
        return '{}([{},...])'.format(class_name,components)


    def __len__(self):
        curr_ptr = self._headNode
        count = 0
        if curr_ptr==None:
            return 0
        while 1:
            count = count + 1
            if curr_ptr[1] is None:
                break
            curr_ptr = curr_ptr[1]
        return count    
    
    def __getitem__(self, index):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:
                    return curr_ptr[0]
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            
    ### Added __setitem__ method
    def __setitem__(self, index, value):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral):
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to set an empty {class_name}'
                raise IndexError(msg.format(class_name=class_name))
            count = 0
            while curr_ptr[1] is not None and count < index:
                curr_ptr = curr_ptr[1]
                count += 1
            if count == index:
                curr_ptr[0] = value
            else:
                raise IndexError('{} index out of range'.format(class_name=class_name))
        else:
            raise TypeError('{} indices must be integers'.format(class_name=class_name))

## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [5]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        

In [7]:
list(make_data(5,2))

[1000000000.7549908, 999999997.8325588, 999999996.8880382]

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [10]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use out generator functions to implement iterators:

In [11]:
g = make_data(5, 10)
list(g)

[1000000000.8602657,
 1000000003.8030909,
 999999997.3788062,
 999999998.9032848,
 1000000000.2013532,
 1000000000.9687692,
 999999999.0212168,
 999999998.2940586,
 1000000002.4101428,
 1000000002.0160671,
 999999991.9516482]

In [12]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)

<class 'generator'>


[999999999.147202,
 999999999.3621409,
 999999999.5624505,
 999999999.7757088,
 999999999.4152756,
 999999999.587253,
 999999999.9054835,
 999999999.9942588,
 999999999.9959834,
 1000000000.1426,
 1000000000.1039194,
 1000000000.1689925,
 1000000000.4828148,
 1000000000.4030095,
 1000000000.5495793,
 1000000000.4979496,
 1000000000.255478,
 1000000000.3380091,
 1000000000.0593823,
 1000000000.0156648,
 999999999.9342558,
 999999999.9772483,
 999999999.9891541,
 999999999.7527844,
 999999999.7642971,
 999999999.729416,
 999999999.6682594,
 999999999.8192363,
 999999999.8517159,
 999999999.8935198,
 999999999.780602,
 999999999.7628399,
 999999999.730688,
 999999999.7167678,
 999999999.7725041,
 999999999.7341801,
 999999999.628769,
 999999999.7262572,
 999999999.5559964,
 999999999.5774873,
 999999999.5822929,
 999999999.6647944,
 999999999.6321726,
 999999999.7310344,
 999999999.6523372,
 999999999.6744062,
 999999999.6809155,
 999999999.6678241,
 999999999.6618245,
 999999999.675602,


### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

In [13]:
# your code here
import math
def online_mean_dev(iterator):
    n, mu, stdev = 0, 0, 0
    for value in iterator:
        n += 1
        old_mu = mu
        mu = mu + (value - mu)/n
        stdev = stdev + (value - mu) * (value - old_mu)
        if n > 1:
            stddev = math.sqrt(stdev/(n - 1))
            yield (n, value, mu, stddev)


Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [14]:
data_with_stats = online_mean_dev(make_data(5, 100000))

In [17]:
for i in range(5):
    print(next(data_with_stats))

(7, 1000000001.848194, 1000000000.0399144, 1.5972751817170585)
(8, 1000000003.6715766, 1000000000.4938722, 1.9584278439865066)
(9, 999999996.4292532, 1000000000.0422479, 2.2785281413816567)
(10, 1000000003.373593, 1000000000.3753824, 2.3926182287037037)
(11, 1000000006.4026257, 1000000000.9233136, 2.90769229068073)


## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is inbetween `level`-$\sigma$ of the mean.

In [18]:
#your code here
def is_ok(level, t):
    if abs(t[1] - t[2]) < t[3] * level:
        return True 
    else:
        return False

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [19]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [20]:
list(anomalies)#materialize

[(1782, 999999980.77654, 999999999.8912255, 2.9999738406756755),
 (4250, 999999985.3141589, 999999999.9970548, 2.9076374232073667),
 (7318, 999999984.8045067, 1000000000.0223895, 2.923392061496004),
 (8496, 999999979.9388931, 1000000000.0085266, 2.9257835677102384),
 (10569, 1000000015.9076763, 1000000000.0218803, 2.9324183313642784),
 (17784, 999999982.3126541, 1000000000.0147505, 2.914380479243411),
 (22154, 1000000015.0276586, 1000000000.0166179, 2.920232951668278),
 (24920, 999999983.0749401, 1000000000.0109448, 2.920940187665244),
 (30656, 1000000016.10791, 1000000000.0169349, 2.923598390504787),
 (34467, 999999984.9024382, 1000000000.0098848, 2.916935047696519),
 (39426, 999999983.2533312, 1000000000.0046419, 2.912282080788748),
 (41993, 1000000021.4394876, 999999999.9985816, 2.909205683944786),
 (42485, 1000000014.9442719, 999999999.9995, 2.9096594696929223),
 (56150, 999999979.9017302, 1000000000.0040393, 2.899650819227098),
 (58157, 999999984.3178011, 1000000000.0043092, 2.897

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).