In [1]:
# setup
from IPython.core.display import display,HTML
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))
display(HTML(open('rise.css').read()))

# imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style="whitegrid", font_scale=1.5, rc={'figure.figsize':(12, 6)})


# CMPS 2200
# Introduction to Algorithms

## Scan + Problems


In [11]:
# Review of primitive functions.

def tabulate(f, n):
    return [f(i) for i in range(n)]

def my_map(f, a):
    return [f(x) for x in a]

def my_filter(f, a):
    return [x for x in a if f(x)]

def iterate(f, x, a):
    if len(a) == 0:
        return x
    else:
        return iterate(f, f(x, a[0]), a[1:])

def reduce(f, id_, a):
    if len(a) == 0:
        return id_
    elif len(a) == 1:
        return a[0]
    else:
        return f(reduce(f, id_, a[:len(a)//2]),
                 reduce(f, id_, a[len(a)//2:]))


def scan(f, id_, a):
    return (
            [reduce(f, id_, a[:i+1]) for i in range(len(a))],
             reduce(f, id_, a)
           )



In [2]:
def plus(x, y):
    return x + y


def flatten(sequences):
    return reduce(plus, [], sequences) # iterate(plus, [], sequences)

### Divide-and-Conquer Scan

<p><span class="math display">\[\begin{array}{l}  
\mathit{scanDC}~f~\mathit{id}~a =  
\\  
~~~~\texttt{if}~|a| = 0~\texttt{then}  
\\  
~~~~~~~~(\left\langle\,  \,\right\rangle, \mathit{id})  
\\  
~~~~\texttt{else if}~|a| = 1~\texttt{then}  
\\   
~~~~~~~~(\left\langle\, \mathit{id} \,\right\rangle,a[0])  
\\  
~~~~\texttt{else}  
\\   
~~~~~~~~\texttt{let}  
\\   
~~~~~~~~~~~~(b, c) = \mathit{splitMid}~a  
\\  
~~~~~~~~~~~~((l,b'),(r,c')) = (\mathit{scanDC}~f~\mathit{id}~b \mid\mid{}~\mathit{scanDC}~f~\mathit{id}~c)  
\\  
~~~~~~~~~~~~r' = \left\langle\, f (b',x) : x \in r \,\right\rangle  
\\  
~~~~~~~~\texttt{in}  
\\  
~~~~~~~~~~~~(\mathit{append}~(l,r'), f(b',c'))  
\\  
~~~~~~~~\texttt{end}  
\end{array}\]</span></p>



Assuming that $f(n)$ can be computed in constant time, we get the following recurrences for work and span:
    
$$ W(n) = 2W(n/2) + O(n)$$

and 

$$ S(n) = S(n/2) + O(1) $$

Thus the work is $O(n \log n)$ and the span is $O(\log n)$.



### Contraction `scan` 

\begin{array}{l}  
\\  
\mathit{scan}~f~\mathit{id}~a =  
\\  
~~~~\texttt{if}~|a| = 0~\texttt{then}   
\\  
~~~~\left(\left\langle\,  \,\right\rangle, id\right)  
\\  
~~~~\texttt{else if}~|a| = 1 ~\texttt{then}  
\\  
~~~~~~~~\left( \left\langle\, id \,\right\rangle, a[0] \right)  
\\  
~~~~\texttt{else}  
\\   
~~~~~~~~\texttt{let}  
\\  
~~~~~~~~~~~~a' = \left\langle\,  f(a[2i],a[2i+1]) : 0 \leq i < n/2 \,\right\rangle  
\\  
~~~~~~~~~~~~(r,t) = \mathit{scan}~f~\mathit{id}~ a' 
\\  
~~~~~~~~\texttt{in}  
\\   
~~~~~~~~~~~~(\left\langle\,  p_i : 0 \leq i < n  \,\right\rangle, t),~\texttt{where}~p_i =   
\begin{cases}  
     r[i/2]  & \texttt{even}(i) \\  
     f(r[i/2], a[i-1]) & \texttt{otherwise}  
\end{cases}  
\\  
~~~~~~~~\texttt{end}  
\end{array}


Assuming that $f(n)$ can be computed in constant time, we get the following recurrences for work and span:
    
$$ W(n) = W(n/2) + O(n)$$

and 

$$ S(n) = S(n/2) + O(1) $$

Thus the work is $O(n)$ and the span is $O(\log n)$.

- surprisingly the *same* work and span of `reduce`
- even though we're keeping track of output for all prefixes.

<br><br>
`scan` is a popular primitive in parallel programming, used to solve many problems, including:
https://en.wikipedia.org/wiki/Prefix_sum


### Filtering

```python
def my_filter(f, a):
    return [x for x in a if f(x)]
```

We need to account for the work to create the return array.

<br><br>
We can't do this in constant span, because the location of one value depends on the location of other values.
<br><br>

$filter \:\: positive \:\: [-1,3,-2,4,-5,6] \rightarrow [3,4,6]$

<br><br>
**idea:** Make a first, parallel pass to create boolean values indicating if the value will be copied to the new array.

$[false,true,false,true,false,true]$

Use **scan** to determine indices in the new array. 

<br>
We'll see a version of this on the lab this week.


<span style="color:red">Question:</span> Find an element which can separate a sequence so that its left and right sum are the same. If no, return -1

> example 1: [1, 6, 4, 2, 5] $\rightarrow$ 4<br>
> example 2: [8, 6, 4, 2, 5] $\rightarrow$ -1

In [5]:
a = [1, 6, 4, 2, 5]
b = a[::-1]
print(a)
print(b)

pa, sa = scan(plus, 0, a)
pb, sb = scan(plus, 0, b)

print(pa)
print(pb)


[1, 6, 4, 2, 5]
[5, 2, 4, 6, 1]
[1, 7, 11, 13, 18]
[5, 7, 11, 17, 18]


In [6]:
## count the frequency per unique character

seq = 'mississippi'
print(seq)

mississippi


In [7]:
## define a mapping funcion

seq_map = list(map(lambda x: (x, 1),seq))

print(seq_map)

[('m', 1), ('i', 1), ('s', 1), ('s', 1), ('i', 1), ('s', 1), ('s', 1), ('i', 1), ('p', 1), ('p', 1), ('i', 1)]


### Collect

> Given a sequence of  key-value pairs, the operation collect ``collects'' together all the values for a given key. This operation is quite common in data processing, and in relational database languages such as SQL it is referred to as “Group by”. 
 
 
```python 
groups = collect([('i', 1), ('s', 1), ('m', 1), ('i', 1)])
groups = [('s', [1]), ('i', [1, 1]), ('m', [1])]  

```
    

In [8]:
from collections import defaultdict

def collect(pairs):
    
    result = defaultdict(list)
    for pair in sorted(pairs):
        result[pair[0]].append(pair[1])
    return list(result.items())


groups = collect(seq_map)
print(groups)

[('i', [1, 1, 1, 1]), ('m', [1]), ('p', [1, 1]), ('s', [1, 1, 1, 1])]


In [9]:
def word_count_reduce(group):
    """
    Params:
      group...a tuple of the form (token, list_of_ones), indicating the number of times each word appears.
    Returns:
      tuple of form (token, int), where int is the number of times that token appears
    E.g.
    >>> word_count_reduce(['i', [1,1]])
    ('i', 2)

    NOTE: you should use call the `reduce` function here.
    """
    return (group[0], reduce(plus, 0, group[1]))


print([word_count_reduce(g) for g in groups])

[('i', 4), ('m', 1), ('p', 2), ('s', 4)]


### All Contiguous Subsequences

Given a sequence $a$, generate all contiguous subsequences.

<br><br>


$\langle a \langle i, \ldots,j \rangle : 0 \le i < |a|, i \le j < |a| \rangle$

$\equiv$

$\langle a \langle i, \ldots,j \rangle : 0 \le i \le j < |a| \rangle$

$\equiv$

$flatten \langle \: \langle a[i \ldots i+j]: i \le j < |a| \rangle : 0 \le i < |a| \rangle$

$\equiv$

$flatten (tabulate (\mathtt{lambda} \:  i \: . tabulate ( \mathtt{lambda} \: j \: . \: a[i \ldots i+j])(|a| - i - 1)|a|)$

In [1]:
# sequential solution

def all_contiguous_subseq(a):
    for i in range(len(a)):
        for j in range(i+1, len(a)+1):
            yield a[i:j]
            
list(all_contiguous_subseq([1,2,3,4,5]))

[[1],
 [1, 2],
 [1, 2, 3],
 [1, 2, 3, 4],
 [1, 2, 3, 4, 5],
 [2],
 [2, 3],
 [2, 3, 4],
 [2, 3, 4, 5],
 [3],
 [3, 4],
 [3, 4, 5],
 [4],
 [4, 5],
 [5]]

In [4]:
# nested tabulate
a = [1,2,3,4,5]
flatten(
    tabulate(lambda i: 
             tabulate(lambda j: a[i:i+j+1],
                      len(a)-i),
         len(a))
)

[[1],
 [1, 2],
 [1, 2, 3],
 [1, 2, 3, 4],
 [1, 2, 3, 4, 5],
 [2],
 [2, 3],
 [2, 3, 4],
 [2, 3, 4, 5],
 [3],
 [3, 4],
 [3, 4, 5],
 [4],
 [4, 5],
 [5]]

### analysis of All Contiguous Subsequences

How many calls to `a[i:i+j+1]` (i.e., `subseq`)?

If $|a|=n$,

$$ \sum_{i=1}^n = \frac{n(n-1)}{2}  \in O(n^2) $$

Work and span of `subseq` is O(1) (**why?**)

Therefore, total work is $O(n^2)$.

<br>

Span of inner `tabulate` is $O(1)$, and outer `tabulate` is also $O(1)$.

<br>

`flatten` at the end requires $O(\lg n)$ span.

Therefore, total span is $O(\lg n)$
