In [1]:
# setup
from IPython.core.display import display,HTML
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))
display(HTML(open('rise.css').read()))

# imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style="whitegrid", font_scale=1.5, rc={'figure.figsize':(12, 6)})


# CMPS 2200
# Introduction to Algorithms

## Scan (Cont'd)


## Scan

*reduce* doesn't store the intermediate results, which limits it somewhat. 

*scan* is reduce version of prefix, compared with iterate_prefix.

$scan \: (f : \alpha \times \alpha \rightarrow \alpha) (id : \alpha) (a : \mathbb{S}_\alpha) : (S_\alpha * \alpha)$


Input is:
- $f$: an associative binary function
- $a$ is the sequence
- $id$ is the **left identity** of $f$ $\:\: \equiv \:\:$ $f(id, x) = x$ for all $x \in \alpha$

Returns:
- a tuple containing:
  - a value of type $S_\alpha$, the sequence of intermediate values
  - a value of type $\alpha$ that is the result of the "sum" with respect to $f$ of the input sequence $a$


<br>

$scan \: f \: id \: a = (\langle reduce \:\: f \:\: id \:\: a[0 \ldots (i-1)] : 0 \le i < |a| \rangle,$  
$\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\:\: reduce \:\: f \:\: id \:\: a)$

In [1]:
def reduce(f, id_, a):

    if len(a) == 0:
        return id_
    elif len(a) == 1:
        return a[0]
    else:
        # can call these in parallel
        return f(reduce(f, id_, a[:len(a)//2]),
                  reduce(f, id_, a[len(a)//2:]))

## inefficient implmentation
def scan(f, id_, a):

    return (
            [reduce(f, id_, a[:i+1]) for i in range(len(a))],
             reduce(f, id_, a)
           )


### Divide-and-Conquer 


$\texttt{prefix_sum}([2,1,3,2,2,5,4,1]) \rightarrow ([0, 2, 3, 6, 8, 10, 15, 19], 20)$

Now we just split the list $[2,1,3,2],[2,5,4,1]$ and recursively compute prefix sums?

We'd get results $(b, b')$ and $(c, c')$ where:

$(b, b') = (\langle 0, 2, 3, 6\rangle, 8)$

$(c, c') = (\langle 0, 2, 7, 11\rangle, 12)$
 
Now, it's easy to see that $b$ already gives us half the solution - how do we get the result by combining solutions?


To compute prefix sums, all we have to do is to add the sum of the first half to all of the elements of $b$ and to $b'$. We can generalize this approach to get:

<p><span class="math display">\[\begin{array}{l}  
\mathit{scanDC}~f~\mathit{id}~a =  
\\  
~~~~\texttt{if}~|a| = 0~\texttt{then}  
\\  
~~~~~~~~(\left\langle\,  \,\right\rangle, \mathit{id})  
\\  
~~~~\texttt{else if}~|a| = 1~\texttt{then}  
\\   
~~~~~~~~(\left\langle\, \mathit{id} \,\right\rangle,a[0])  
\\  
~~~~\texttt{else}  
\\   
~~~~~~~~\texttt{let}  
\\   
~~~~~~~~~~~~(b, c) = \mathit{splitMid}~a  
\\  
~~~~~~~~~~~~((l,b'),(r,c')) = (\mathit{scanDC}~f~\mathit{id}~b \mid\mid{}~\mathit{scanDC}~f~\mathit{id}~c)  
\\  
~~~~~~~~~~~~r' = \left\langle\, f (b',x) : x \in r \,\right\rangle  
\\  
~~~~~~~~\texttt{in}  
\\  
~~~~~~~~~~~~(\mathit{append}~(l,r'), f(b',c'))  
\\  
~~~~~~~~\texttt{end}  
\end{array}\]</span></p>



Assuming that $f(n)$ can be computed in constant time, we get the following recurrences for work and span:
    
$$ W(n) = 2W(n/2) + O(n)$$

and 

$$ S(n) = S(n/2) + O(1) $$

Thus the work is $O(n \log n)$ and the span is $O(\log n)$.


In [2]:
def scanDC(f, id_, a):

    if len(a) == 0:
        return ([], id_)
    elif len(a) == 1:
        return ([id_], a[0])
    else:
        b = a[:len(a)//2]
        c = a[len(a)//2:]
        left, L = scanDC(f, id_, b)
        right, R = scanDC(f, id_, c)
        updated_right = [f(L, x) for x in right]
        return left + updated_right, f(L, R)

def plus(x,y):
    return x + y
        
scanDC(plus, 0, [2,1,3,2,2,5,4,1])

([0, 2, 3, 6, 8, 10, 15, 19], 20)

Instead, we use an idea called **contraction** that is like divide and conquer, but doesn't require subproblems to be independent. Yet it still allows some parallelism.


<br><br>

**Key observation:**

Given input $[2,1,3,2,2,5,4,1]$ we can compute pairwise addition on each adjacent pairs of numbers:

$[2,1,3,2,2,5,4,1] \rightarrow$

$[(2+1), (3+2), (2+5), (4+1)] \rightarrow$

$[3, 5, 7, 5]$

### These four additions can be done in parallel

This is a **partial** output. How do we modify this to get the final output?


Since we are using recursion, we will assume we have access to the right output for this subproblem, which is

$\texttt{prefix_sum}([3, 5, 7, 5]) \rightarrow ([0, 3, 8, 15], 20)$

We want to have $~[\mathbf{0}, 2, \mathbf{3}, 6, \mathbf{8}, 10, \mathbf{15}, 19], 20)$

<br>

How can we combine this partial solution with the original input $[2,1,3,2,2,5,4,1]$ to get the right answer?


![figures/scan.png](figures/scan.png)

> Sum together the partial output at position $i$ with the original input at $i+1$.

In [3]:
def fastscan(f, id_, a):
    space = len(a) * '  ' # for printing
    print(space, 'a=', a)

    # base cases are same as reduce
    if len(a) == 0:
        return [], id_
    elif len(a) == 1:
        return [id_], a[0]
    else:
        # compute the "partial solution" by
        # applying f to each adjacent pair of numbers 
        # e.g., [2, 1, 3, 2, 2, 5, 4, 1] -> [3, 5, 7, 5]
        # this can be done in parallel
        subproblem = [f(a[i], a[i+1]) for i in range(len(a))[::2]]
        print(space, 'subproblem=', subproblem)

        # recursively apply fastscan to the subproblem
        partial_output, total = fastscan(f, id_, subproblem)     # ->[8, 12]->[20]
        # partial_output = [0, 3, 8, 15]   total=20
        print(space, 'partial_output=', partial_output, 'total=', total)
        
        # combine partial_output with input to get desired output
        ret = (
            [partial_output[i//2] if i%2==0 else   # use partial output
             f(partial_output[i//2], a[i-1])       # combine partial output with next value
             for i in range(len(a))],
            total
        )
        print(space, 'returning', ret)
        return ret

def plus(x,y):
    return x + y
        
fastscan(plus, 0, [2,1,3,2,2,5,4,1])

                 a= [2, 1, 3, 2, 2, 5, 4, 1]
                 subproblem= [3, 5, 7, 5]
         a= [3, 5, 7, 5]
         subproblem= [8, 12]
     a= [8, 12]
     subproblem= [20]
   a= [20]
     partial_output= [0] total= 20
     returning ([0, 8], 20)
         partial_output= [0, 8] total= 20
         returning ([0, 3, 8, 15], 20)
                 partial_output= [0, 3, 8, 15] total= 20
                 returning ([0, 2, 3, 6, 8, 10, 15, 19], 20)


([0, 2, 3, 6, 8, 10, 15, 19], 20)


### `scan` in SPARC 

\begin{array}{l}  
\\  
\mathit{scan}~f~\mathit{id}~a =  
\\  
~~~~\texttt{if}~|a| = 0~\texttt{then}   
\\  
~~~~\left(\left\langle\,  \,\right\rangle, id\right)  
\\  
~~~~\texttt{else if}~|a| = 1 ~\texttt{then}  
\\  
~~~~~~~~\left( \left\langle\, id \,\right\rangle, a[0] \right)  
\\  
~~~~\texttt{else}  
\\   
~~~~~~~~\texttt{let}  
\\  
~~~~~~~~~~~~a' = \left\langle\,  f(a[2i],a[2i+1]) : 0 \leq i < n/2 \,\right\rangle  
\\  
~~~~~~~~~~~~(r,t) = \mathit{scan}~f~\mathit{id}~ a' 
\\  
~~~~~~~~\texttt{in}  
\\   
~~~~~~~~~~~~(\left\langle\,  p_i : 0 \leq i < n  \,\right\rangle, t),~\texttt{where}~p_i =   
\begin{cases}  
     r[i/2]  & \texttt{even}(i) \\  
     f(r[i/2], a[i-1]) & \texttt{otherwise}  
\end{cases}  
\\  
~~~~~~~~\texttt{end}  
\end{array}

### Analysis of the Work of `scan` 

Assume that function `f` is constant time.

<br><br>


```python
subproblem = [f(a[i], a[i+1]) for i in range(len(a))[::2]]
```


takes $O(n)$

<br><br>

```python
        ret = (
            [partial_output[i//2] if i%2==0 else
             f(partial_output[i//2], a[i-1])  
             for i in range(len(a))],
            total
        )
```

takes $O(n)$

```python
partial_output, total = fastscan(f, id_, subproblem)
```


reduces problem in half each recursive call

but there is only one recursive call, instead of two for, e.g., `merge sort`



$$W(n) = W(n/2) + n$$

$$W(n) \in O(n)$$

### Analysis of the Span of `scan` 

Assume that function `f` is constant time.


```python
subproblem = [f(a[i], a[i+1]) for i in range(len(a))[::2]]
```



With infinite processors, this can be done in constant work.




```python
        ret = (
            [partial_output[i//2] if i%2==0 else
             f(partial_output[i//2], a[i-1])  
             for i in range(len(a))],
            total
        )
```


With infinite processors, this can be done in constant work.

```python
partial_output, total = fastscan(f, id_, subproblem)
```



reduces problem in half each recursive call

<br><br>



$$S(n) = S(n/2) + O(1)$$


$$S(n) \in O(\lg n)$$

- surprisingly the *same* work and span of `reduce`
- even though we're keeping track of output for all prefixes.

<br><br>
`scan` is a popular primitive in parallel programming, used to solve many problems, including:
https://en.wikipedia.org/wiki/Prefix_sum





<span style="color:red">Question:</span> Find an element which can separate a sequence so that its left and right sum are the same. If no, return -1

> example 1: [1, 6, 4, 2, 5] $\rightarrow$ 4<br>
> example 2: [8, 6, 4, 2, 5] $\rightarrow$ -1

In [4]:
a = [1, 6, 4, 2, 5]
b = a[::-1]
print(a)
print(b)

pa, sa = scan(plus, 0, a)
pb, sb = scan(plus, 0, b)

print(pa)
print(pb)


[1, 6, 4, 2, 5]
[5, 2, 4, 6, 1]
[1, 7, 11, 13, 18]
[5, 7, 11, 17, 18]


In [5]:
## count the frequency per unique character

a = 'mississippi'
print(a)

mississippi


In [6]:
## define a mapping funcion

aa = list(map(lambda x: (x, 1),a))

print(aa)

[('m', 1), ('i', 1), ('s', 1), ('s', 1), ('i', 1), ('s', 1), ('s', 1), ('i', 1), ('p', 1), ('p', 1), ('i', 1)]


### Collect

> Given a sequence of  key-value pairs, the operation collect ``collects'' together all the values for a given key. This operation is quite common in data processing, and in relational database languages such as SQL it is referred to as “Group by”. 
 
 
```python 
ga = collect([('i', 1), ('s', 1), ('m', 1), ('i', 1)])
ga = [('s', [1]), ('i', [1, 1]), ('m', [1])]  

```
    

In [7]:
from collections import defaultdict

def collect(pairs):
    
    result = defaultdict(list)
    for pair in sorted(pairs):
        result[pair[0]].append(pair[1])
    return list(result.items())



In [8]:
ga = collect(aa)
print(ga)

[('i', [1, 1, 1, 1]), ('m', [1]), ('p', [1, 1]), ('s', [1, 1, 1, 1])]


In [9]:
def reduce(f, id_, a):
    # done. do not change me.
    if len(a) == 0:
        return id_
    elif len(a) == 1:
        return a[0]
    else:
        return f(reduce(f, id_, a[:len(a)//2]),
                 reduce(f, id_, a[len(a)//2:]))
    
def plus(x, y):
    # done. do not change me.
    return x + y
    
print(reduce(plus, 0, ga[0][1]))

4
