# Sparse Hebbian Learning 

In [1]:
%run setup.py --description

 This is a collection of python scripts to test learning strategies to efficiently code natural image patches.  This is here restricted  to the framework of the [SparseNet algorithm from Bruno Olshausen](http://redwood.berkeley.edu/bruno/sparsenet/).


In [2]:
import numpy as np
import pylab
import matplotlib
#matplotlib.rcParams.update({'font.size': 18, 'font.family': 'STIXGeneral', 'mathtext.fontset': 'stix'})
matplotlib.rcParams.update({'font.size': 18, 'text.usetex': True})
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
from shl_scripts import SHL
DEBUG_DOWNSCALE = 100
shl = SHL(DEBUG_DOWNSCALE=DEBUG_DOWNSCALE)
dico = shl.learn_dico(learning_algorithm='omp', transform_n_nonzero_coefs=10, gain_rate=0.01, verbose=11)
fig = shl.show_dico(dico)
fig.show()

Learning the dictionary...
Extracting data...
done in 12.06s.
Training on 3000 patches
Iteration   0 (elapsed time:   0s,  0.0mn)
Gain  [ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1. 

In [None]:
!python setup.py --long-description

## debug

In [None]:
# one issue during learning is that some filters may always win, breaking the efficiency of the population of filters
iters=1000
S = np.zeros((coder.psi.shape[1], iters)) 
for i_iter in range(iters):
    x = image_data.draw()
    S[:, i_iter] = np.absolute(coder.sparsify(x))
    
#fig = plt.figure(figsize=(12, 12))
#plt.plot(np.arange(a.size), S.mean(axis=1))
#plt.plot(np.arange(a.size), np.percentile(S, 90, axis=1), 'r--')
#plt.plot(np.arange(a.size), np.percentile(S, 99, axis=1), 'r--')



# showing the sorted vector of the mean energy for each filter show they are not well distributed
#fig = plt.figure(figsize=(12, 12), subplotpars=subplotpars)
#for col in range(S.shape[0]):
#    a = fig.add_subplot(np.sqrt(S.shape[0]), np.sqrt(S.shape[0]), col + 1)
#    a.plot(np.sort(S[col]))
#    a.axis("off")
    
fig = plt.figure(figsize=(12, 12), subplotpars=subplotpars)
a = fig.add_subplot(111)
for col in range(S.shape[0]):
    a.plot(np.sort(S[col]))


In [None]:
# what we need is some sort of histogram normalization
def histeq(s):
    #use linear interpolation of cdf to find new pixel values
    # use the fact that the sorted coeffs gize the inverse cdf
    z = np.interp(s, np.sort(s), np.linspace(0, 1., s.size))
    return z

Z = np.zeros((S.shape[0], iters)) 
for i_filter in range(S.shape[0]):
    #s_cdf, s_bins = cdf(S[i_filter, :])
    Z[i_filter, :] = histeq(S[i_filter, :])
    
fig = plt.figure(figsize=(12, 12))
plt.plot(np.linspace(0, 1, S.shape[0]), Z.mean(axis=1))
plt.plot(np.linspace(0, 1, S.shape[0]), np.percentile(Z, 25, axis=1), 'r--')
plt.plot(np.linspace(0, 1, S.shape[0]), np.percentile(Z, 75, axis=1), 'r--')

# showing the sorted vector of the z-score for each filter show they are now well distributed
fig = plt.figure(figsize=(12, 12))
a = fig.add_subplot(111)
for col in range(Z.shape[0]):
    a.plot(np.sort(Z[col]))



Results
=======

In [None]:
%run learn.py


## Version used

In [None]:
%install_ext http://raw.github.com/jrjohansson/version_information/master/version_information.py
%load_ext version_information
%version_information numpy