<h1 style="margin-bottom: 0; padding-bottom: 0;"><center>The Computational Model</center></h1><h2 style="margin: 0; padding: 0;"><center>Saliency-Based Visual Attention</center></h2>

<p><small>Sebastian Höffner &amp; Alexander Höreth<br /><i>November 21, 2016</i></small></p>

<p><small><b>Laurent Itti, Christof Koch, Ernst Niebur</b>: A Model of Saliency-Based Visual Attention for Rapid Scene Analysis. <i>IEEE Transactions on Pattern Analysis and Machine Intelligence</i>, Vol 20, No 11, pp. 1254&ndash;1259. 1998.</p>

<p><small>Image credit goes to the paper if not otherwise mentioned.</small></p>

In [None]:
%matplotlib inline
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import order_filter, convolve2d
from os.path import splitext, basename

def draw(x, y, i, img, title=''):
    ax = plt.subplot(x, y, 0 + i)
    ax.axis('off')
    ax.imshow(img, cmap='gray')
    ax.set_title(title)

def _make_pyramids(img):
    pyramids = [img]
    for i in range(8):
        pyramids.append(cv2.pyrDown(pyramids[-1]))
    return pyramids

def toDegree(rad):
    return rad * (180/np.pi)


### Model Architecture

<img src="architecture.png" alt="Model architecture" width="60%" />

In [None]:
# impath = '640x480/test0.jpg'
# impath = '384x384/test1.jpg'
# impath = '384x384/test2.jpg'
# impath = '500x357/voc2012_000122.jpg'
# impath = '500x357/voc2012_000138.jpg'

# Input Image

In [None]:
impath = '640x480/test0.jpg'
img = plt.imread(impath)
plt.imshow(img);

# Linear Filtering <small>(level 1)</small>

<img src="linearfiltering.png" alt="Linear Filtering" style="width: 60%;" />

## Color Separation

In [None]:
r, g, b = (img[:, :, 0], img[:, :, 1], img[:, :, 2])

draw(2, 2, 1, r, 'red')
draw(2, 2, 2, g, 'green')
draw(2, 2, 3, b, 'blue')

In [None]:
def _normalize_channel(nom, denom):
    threshold = 0.1*np.max(denom)
    nom = np.copy(nom)
    yes = np.where(denom > threshold)
    nom[np.where(denom <= threshold)] = 0
    nom[yes] = nom[yes] / denom[yes]
    return nom

I = (r + g + b) / 3
r, g, b = (_normalize_channel(c, I) for c in (r, g, b))

## Broadly-Tuned Color Channels

In [None]:
R = r - (g + b) / 2
G = g - (r + b) / 2
B = b - (r + g) / 2
Y = (r + g) / 2 - np.abs(r - g) / 2 - b

draw(2, 2, 1, R, 'red')
draw(2, 2, 2, G, 'green')
draw(2, 2, 3, B, 'blue')
draw(2, 2, 4, Y, 'yellow')

## Color Channel Feature Pyramids

In [None]:
Rp, Gp, Bp, Yp = (_make_pyramids(m) for m in (R, G, B, Y))

In [None]:
colors = ('red', 'green', 'blue', 'yellow')
for x, layers in enumerate(zip(Rp, Gp, Bp, Yp)):
    for y, layer in enumerate(layers):
        draw(9, 4, 1+(x*4)+y, layer, colors[y] if x==0 else '')

## Intensity Image Feature Pyramid

In [None]:
I = (r + g + b) / 3
Ip = _make_pyramids(I)

In [None]:
for i, layer in enumerate(Ip): 
    draw(3, 3, i+1, layer, '%dx%d' % layer.shape)

## Gabor Cells

Using gabor cells in 4 different orientations to approximate the receptive field orientation sensitivity profile.

In [None]:
orientations = np.arange(0, np.pi, np.pi/4)
getKernel = lambda t: cv2.getGaborKernel((32, 32), sigma=2, theta=t, lambd=10, gamma=.5, psi=0)
gabors = [getKernel(theta) for theta in orientations]

In [None]:
for i, gabor in enumerate(gabors):
    draw(2, 2, i+1, gabor, 'theta = %s' % toDegree(orientations[i]))

## Orientation Features 

In [None]:
img_gray = np.dot(img[..., :3], [.299, .587, .114])
Os = [cv2.filter2D(img_gray, -1, gabor) for gabor in gabors]

In [None]:
for i, O in enumerate(Os):
    draw(2, 2, i+1, O, 'theta = %s' % toDegree(orientations[i]))

## Orientation Feature Pyramids

In [None]:
Ops = [_make_pyramids(O) for O in Os]

In [None]:
for x, layers in enumerate(zip(*Ops)):
    for y, layer in enumerate(layers):
        draw(9, 4, 1+(x*4)+y, layer, 'theta = %s' % toDegree(orientations[y]) if x==0 else '')

## Feature Maps Recap
* 6 for intensity contrast - mammals: dark centers bright surrounds or vice-versa
* 12 for color - mammals: excitation by one color, inhibition by opposite color
* 24 for orientation - mammals: primary visual cortex has layers to detect orientations
    * = 42 feature maps
    
<img src="linearfiltering.png" alt="Linear Filtering" style="width: 60%;" />

# Center-Surround Differences <small>(level 2)</small>

<img src="centersurrounddifferences.png" alt="Linear Filtering" style="width: 60%;" />

<p>&nbsp;</p>
\begin{align}
    \mathcal{I}(c,s) &= |I(c) \ominus I(s)| & (1) \\
    \mathcal{RG}(c,s) &= |(R(c) - G(c)) \ominus (G(s) - R(s))| & (2) \\
    \mathcal{BY}(c, s) &= |(B(c) - Y(c)) \ominus (Y(s) - B(s))| & (3) \\
    O(c,s,\theta) &= |O(c,\theta) \ominus O(s,\theta)| & (4) \\
\end{align}
<p>&nbsp;</p>
<center>$a \ominus b$: <i>Interpolate $b$ to the size of $a$ (the bigger image) and do a point-wise subtraction</i></center>

### Combinations
The following prove well for most images:
$$(c, s) \in \{ (2, 5), (2, 6), (3, 6), (3, 7), (4, 7), (4, 8) \}$$

In [None]:
cs = np.asarray([(2,5), (2,6), (3,6), (3,7), (4,7), (4,8)])-1

In [None]:
def _center_surround_diff(c, s, a, b=None):
    l = a[c] - (b[c] if b is not None else 0)
    r = a[s] if b is None else b[s] - a[s]
    return (np.abs(l - cv2.resize(r, l.shape[::-1])))

## Intensity and Colors

<p>&nbsp;</p>
\begin{align}
    \mathcal{I}(c,s) &= |I(c) \ominus I(s)|\\
    \mathcal{RG}(c,s) &= |(R(c) - G(c)) \ominus (G(s) - R(s))|\\
\end{align}
<p>&nbsp;</p>

In [None]:
Ics = [_center_surround_diff(c, s, Ip) for c, s in cs]
RGcs = [_center_surround_diff(c, s, Rp, Gp) for c, s in cs]
BYcs = [_center_surround_diff(c, s, Bp, Yp) for c, s in cs]

In [None]:
differences = ('intensity', 'red/green', 'blue/yellow')
for x, diffs in enumerate(zip(Ics, RGcs, BYcs)):
    for y, diff in enumerate(diffs):
        draw(6, 3, 1+(x*3)+y, diff, differences[y] if x==0 else '')

## Orientations

\begin{align}
    O(c,s,\theta) &= |O(c,\theta) \ominus O(s,\theta)|
\end{align}

In [None]:
Otcs = [[_center_surround_diff(c, s, Op) for c, s in cs] for Op in Ops]

In [None]:
for x, diffs in enumerate(zip(*Otcs)):
    for y, diff in enumerate(diffs):
        draw(6, 4, 1+(x*4)+y, diff, 'theta = %s' % toDegree(orientations[y]) if x==0 else '')

## Normalizations

- Find maximum $M$ in image
- Compute average $\bar{m}$ of all other local maxima $m_i$
- Multiply map by $(M-\bar{m})^2$

In [None]:
def _normalize(img):
    M = np.max(img)
    kernel = np.ones((3, 3), dtype=np.int)
    filtered = order_filter(img, kernel, np.sum(kernel) - 1)
    m = np.mean(img[np.equal(np.equal(img, filtered), filtered != M)])
    return img * ((M - m) ** 2)

Ics = [_normalize(img) for img in Ics]
RGcs = [_normalize(img) for img in RGcs]
BYcs = [_normalize(img) for img in BYcs]
Otcs = [[_normalize(img) for img in Ocs] for Ocs in Otcs]

## Center-Surround Differences Recap

<img src="centersurrounddifferences.png" alt="Linear Filtering" style="width: 60%;" />

<center>42 maps total</center>

# Across-Scale Combinations <small>(level 3)</small>

<img src="acrossscalecombinations.png" alt="Linear Filtering" style="width: 60%;" />

<p>&nbsp;</p>
\begin{align}
\bar{\mathcal{I}} &= \oplus^4_{c=2} \oplus^{c+4}_{s=c+3} \mathcal{N}(I(c,s))\\
\bar{C} &= \oplus^4_{c=2} \oplus^{c+4}_{s=c+3} [\mathcal{N}(\mathcal{RG}(c,s)) + \mathcal{N}(\mathcal{BY}(c,s))]\\
\bar{O} &= \sum_{\theta \in \{0^{\circ},45^{\circ},90^{\circ},135^{\circ}\}} \mathcal{N} \left( \oplus^4_{c=2} \oplus^{c+4}_{s=c+3} \mathcal{N}(O(c,s,\theta)) \right)\\
\end{align}
<p>&nbsp;</p>

## Combinations

In [None]:
def _addition(imgs, size):
    imgs = [cv2.resize(img, size[::-1]) for img in imgs]
    return np.sum(imgs, 0)

Ibar = _addition(Ics, Ics[3].shape)
Cbar = _addition([RGcs[i] + BYcs[i] for i in range(len(RGcs))], RGcs[3].shape)
Obar = np.sum([_normalize(_addition(Ocs, Ics[3].shape)) for Ocs in Otcs], 0)

draw(1, 3, 1, Ibar, 'Intensity')
draw(1, 3, 2, Cbar, 'Color')
draw(1, 3, 3, Obar, 'Orientation')

# Saliency Map Combination

<img src="architecture.png" alt="Model architecture" width="60%" />

## Combine Conspicuity Maps to Saliency Map
$$S=\frac{1}{3} \left(\mathcal{N}(\bar{\mathcal{I}})+\mathcal{N}(\bar{C})+\mathcal{N}(\bar{O})\right)$$

In [None]:
S = 1/3 * _normalize(Ibar) + _normalize(Cbar) + _normalize(Obar)

In [None]:
draw(1, 2, 1, S, 'Saliency Map')
draw(1, 2, 2, img, 'Input Image')