In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datetime import datetime
import os.path
import time

import numpy as np
from six.moves import xrange  # pylint: disable=redefined-builtin
import math
from utils import *

#from nets import *
from random import * 

placeholder_dict = {}

"""
Sample generation
"""
def rand_pm1():
    if randint(0,1)==0:
        return -1
    else:
        return 1

def make_data_set(A,size,m,s,f):
    n = np.shape(A)[1]
    hs = np.zeros((size,m))
    for i in range(size):
        coords = sample_without_replacement(n,s)
        for j in coords:
            hs[i,j] = f()
    return np.dot(hs,A).astype(np.float32)
#(np.dot(hs,A).astype(np.float32), hs)

def make_data_set_pm1(A, size,m,s):
    return make_data_set(A,size,m,s,rand_pm1)

def get_batch(A,m,s, num_examples):
    return make_data_set_pm1(A,num_examples,m,s)

def init_close(A, sigma):
    dims = np.shape(A)
    R= 1/math.sqrt(dims[1]) * sigma * np.random.randn(*dims)
    return (A + R).astype(np.float32)

def am_step(xs, A, eta, th, verbosity=1):
    #decoding
    hs = [[x if x>th or x<-th else 0 for x in li] for li in np.dot(xs,np.transpose(A))]
#map(lambda x: x if x>th or x<-th else 0, np.dot(xs,np.transpose(A)))
    diff = (np.dot(hs, A) - xs)
    grad = np.dot( np.transpose(hs), diff)/np.shape(xs)[0]
    #print(np.shape(A))
    #print(np.shape(grad))
    #print(grad)
    #print("am_step")
    #print(hs[0], diff[0], eta, grad[0])
    A = A - eta * grad
    if verbosity==1:
        loss = np.mean(np.sum(np.square(diff), 1))
        #np.sum(np.square(diff))
        printv("Loss: %f" % loss, verbosity, 1)
    return A

def eval_step(xs, A, th, verbosity=1):
    hs = [[x if x>th or x<-th else 0 for x in li] for li in np.dot(xs,np.transpose(A))]
#map(lambda x: x if x>th or x<-th else 0, np.dot(xs,np.transpose(A)))
    diff = xs - np.dot(hs, A)
    loss = np.mean(np.sum(np.square(diff), 1))
    printv("Eval loss: %f" % loss, verbosity, 1)
    return loss

def train_dl(A, B, m, s,batch_size, steps, eta, th, eval_steps):
    xs = get_batch(A, m, s, batch_size)
    eval_step(xs, B, th)
    for i in range(1,steps+1):
        xs = get_batch(A, m, s, batch_size)
        B = am_step(xs, B, eta, th, 1 if i % eval_steps == 0 else 0)
        #print(B[0])
        if i % eval_steps == 0:
            xs = get_batch(A, m, s, batch_size)
            loss = eval_step(xs, B, th)
    return (B, loss)

def make_A(m,n,verbosity=1):
    A = 1/math.sqrt(n) * np.random.randn(m,n)
    A = [ai/np.linalg.norm(ai) for ai in A]
    printv("A:", verbosity,2)
    printv(A, verbosity, 2)
    printv("AA^T:", verbosity, 2)
    printv(np.dot(A,np.transpose(A)), verbosity, 2)
    return A

def am_dl(verbosity=1):
    f = open('am_dl_3_50_25.txt', 'w')
    s = 3
    m = 50 # hidden vector
    n = 25 # observed vector
    q = s/m
    printv((s,m,n,q),verbosity,1)
    max_steps = 2000
    eval_steps = 100      
    #alpha_list = [1e-2] #[1e-4, 1e-3, 1e-2]
    batch_size = 256 # \Om(m * s)
    th = 0.5
    eta = 0.1 # 0.1 * m/s
    A = make_A(m,n,verbosity)
    #[(lambda M: init_close(M,0), "0")]:
    for (init,st) in [(init_close(A, 0.05), "0.05"),
                      (init_close(A, 0.1), "0.1"),
                      (init_close(A, 0.2), "0.2"),
                      (init_close(A, 0.5), "0.5"),
                      (1/math.sqrt(n)*np.random.randn(m,n).astype(np.float32), "random"), 
                      (make_data_set_pm1(A,m,m,s), "samples"),
                      (make_data_set_pm1(A,2*m,m,s), "oversamples")]:
        train_dl_and_eval(A, m, s, batch_size, max_steps, eval_steps, eta, f, init, st, th=0.5, verbosity=verbosity)
    f.close()

def train_dl_and_eval(A, m, s, batch_size, max_steps, eval_steps, eta, f=None, init=None, st="", th=0.5, verbosity=1):
    #for alpha in alpha_list:

#    printv((batch_size, max_steps, eta, st), verbosity,1)
#    f.write(str((batch_size, max_steps, eta, st)))
    printv(st, verbosity, 1)
    if f!=None:
        f.write(st)
        f.write("\n")

    A0 = init
    (B, loss) = train_dl(A, A0, m, s, batch_size, max_steps, eta, th, eval_steps)
    printv("A:", verbosity, 2)
    printv(A, verbosity, 2)
    printv("B:", verbosity, 2)
    printv(B, verbosity, 2)

    if f!=None:
        f.write("Loss:\n")
        f.write(str(loss))
        f.write("\n")

    (mins1, argmins1, closestRows1) = getClosestRows(A,B)
    printv("Distance from rows of A:", verbosity, 1)
    printv(mins1, verbosity, 1)
    if f!=None:
        f.write("Distance from rows of A:\n")
        f.write(str(mins1))
        f.write("\n")

    Bn = [bi/np.linalg.norm(bi) for bi in B]

    (mins2, argmins2, closestRows2) = getClosestRows(A,Bn)
    printv("Distance from rows of A (after normalization):", verbosity, 1)
    printv(mins2, verbosity, 1)
    if f!=None:
        f.write("Distance from rows of A (after normalization):\n")
        f.write(str(mins2))
        f.write("\n")

    (mins3, argmins3, closestRows3) = getClosestRows(Bn, A)
    printv("Distance from rows of B (after normalization):", verbosity, 1)
    printv(mins3, verbosity, 1)
    if f!=None:
        f.write("Distance from rows of B (after normalization):\n")
        f.write(str(mins3))
        f.write("\n")

    AB = np.dot(A,np.transpose(B))
    printv("AB^T", verbosity, 2)
    printv(AB, verbosity, 2)
    return (loss, B, Bn, mins1, mins2, mins3, AB)
    
"""
Is learned dictionary close to real dictionary?
"""

def getClosestRows(A,B):
    dists = [[np.linalg.norm(ai-bi) for bi in B] for ai in A]
    argmins = [np.argmin(row) for row in dists]
    mins = [np.min(row) for row in dists]
    closestRows = [B[i] for i in argmins]
    return (mins, argmins, closestRows)  

"""
if __name__=="__main__":
    np.set_printoptions(threshold=np.inf)
    am_dl()
"""

'\nif __name__=="__main__":\n    np.set_printoptions(threshold=np.inf)\n    am_dl()\n'

In [2]:
def am_dl_eta():
    f = open('am_dl_3_50_25.txt', 'w')
    s = 3
    m = 50 # hidden vector
    n = 25 # observed vector
    q = s/m
    print(s,m,n,q)
    max_steps = 2000
    eval_steps = 100      
    #alpha_list = [1e-2] #[1e-4, 1e-3, 1e-2]
    batch_size = 256 # \Om(m * s)
    th = 0.5
    eta = 0.1 # 0.1 * m/s
    A = 1/math.sqrt(n) * np.random.randn(m,n)
    A = [ai/np.linalg.norm(ai) for ai in A]
    print("A:")
    print(A)
    print("AA^T:")
    print(np.dot(A,np.transpose(A)))
    init = init_close(A, 0.5)
    st = "0.5"
    for eta in [0.1, 0.5, 1]:
            print(batch_size, max_steps, eta, st)
            f.write(str((batch_size, max_steps, eta, st)))
            f.write("\n")

            A0 = init
            (B, loss) = train_dl(A, A0, m, s, batch_size, max_steps, eta, th, eval_steps)
            #print("A:")
            #print(A)
            #print("B:")
            #print(B)

            f.write("Loss:\n")
            f.write(str(loss))
            f.write("\n")
            
            (mins, argmins, closestRows) = getClosestRows(A,B)
            #print("Distance from rows of A:")
            #print(mins)
            f.write("Distance from rows of A:\n")
            f.write(str(mins))
            f.write("\n")
            
            Bn = [bi/np.linalg.norm(bi) for bi in B]

            (mins, argmins, closestRows) = getClosestRows(A,Bn)
            print("Distance from rows of A (after normalization):")
            print(mins)
            f.write("Distance from rows of A (after normalization):\n")
            f.write(str(mins))
            f.write("\n")

            (mins, argmins, closestRows) = getClosestRows(Bn, A)
            print("Distance from rows of B (after normalization):")
            print(mins)
            f.write("Distance from rows of B (after normalization):\n")
            f.write(str(mins))
            f.write("\n")
            
            AB = np.dot(A,np.transpose(B))
            print("AB^T")
            print(AB)
    f.close()


In [4]:
sample_without_replacement(10, 6)

[4, 8, 1, 6, 2, 9]

In [3]:
np.set_printoptions(threshold=np.inf)
np.set_printoptions(precision=2)

In [7]:
am_dl_eta()
#Conclusion: eta=1 is fine!

3 50 25 0.06
A:
[array([ 0.11,  0.11,  0.16,  0.06,  0.17, -0.38, -0.04, -0.01,  0.04,
       -0.04,  0.33,  0.03, -0.09, -0.55,  0.31,  0.01, -0.25, -0.01,
        0.01, -0.09,  0.17, -0.18, -0.24, -0.06, -0.24]), array([-0.37,  0.1 ,  0.03,  0.03, -0.02,  0.62,  0.  ,  0.09,  0.2 ,
       -0.3 ,  0.2 , -0.07, -0.35, -0.04,  0.04, -0.09,  0.08, -0.05,
        0.23,  0.19, -0.01,  0.2 , -0.08,  0.06, -0.04]), array([-0.3 ,  0.22, -0.09, -0.06,  0.02, -0.04,  0.16, -0.05, -0.3 ,
       -0.03,  0.04,  0.22,  0.07, -0.26,  0.42, -0.42,  0.2 ,  0.05,
        0.22, -0.22,  0.22,  0.11, -0.06, -0.08,  0.21]), array([-0.19, -0.31,  0.26, -0.27,  0.21, -0.05, -0.06, -0.12, -0.34,
       -0.  ,  0.17, -0.42,  0.17,  0.01, -0.11, -0.02, -0.23, -0.11,
       -0.31,  0.03, -0.03, -0.11, -0.02, -0.06,  0.34]), array([ 0.28,  0.12, -0.31, -0.4 , -0.07, -0.33, -0.14,  0.08,  0.02,
       -0.1 ,  0.04,  0.22, -0.08,  0.19,  0.16, -0.04, -0.15, -0.18,
        0.45,  0.24, -0.11,  0.12,  0.1 ,  0.05, -0

In [4]:
def am_dls(verbosity=1):
    max_steps = 2000
    eval_steps = 100      
    th = 0.5
    eta = 1 # 0.1 * m/s
    data = []
    batch_size = 256 # \Om(m * s)
    f = open('am_dls.txt', 'w')
    for m in [50*2**k for k in range(5)]:
        n = m/2
        for s in [2**k for k in range(int(math.ceil(math.log(m,2)))+1)]:
            q = s/m
            printv((s,m,n,q),verbosity,1)
            A = make_A(m,n,verbosity)
            for (init,st) in [(init_close(A, 0.05), "0.05"),
                      (init_close(A, 0.1), "0.1"),
                      (init_close(A, 0.2), "0.2"),
                      (init_close(A, 0.5), "0.5"),
                      (1/math.sqrt(n)*np.random.randn(m,n).astype(np.float32), "random"), 
                      (make_data_set_pm1(A,m,m,s), "samples"),
                      (make_data_set_pm1(A,2*m,m,s), "oversamples")]:
                (loss, B, Bn, mins1, mins2, mins3, AB) = \
                    train_dl_and_eval(A, m, s, batch_size, max_steps, eval_steps, eta, f, init, st, 
                                      th=0.5, verbosity=verbosity)
                data.append((m,n,s, loss, mins1, mins2, mins3, AB))
    f.close()



In [25]:
m = 17
[2**k for k in range(int(math.ceil(math.log(m,2)))+1)]

[1, 2, 4, 8, 16, 32]

In [30]:
am_dls()

(1, 50, 25.0, 0.02)
0.05
Eval loss: 0.126744
Loss: 0.001509
Eval loss: 0.001255
Loss: 0.000293
Eval loss: 0.000327
Loss: 0.000325
Eval loss: 0.000318
Loss: 0.000367
Eval loss: 0.000399
Loss: 0.000336
Eval loss: 0.007126
Loss: 0.000241
Eval loss: 0.000312
Loss: 0.000158
Eval loss: 0.000171
Loss: 0.000215
Eval loss: 0.000202
Loss: 0.000257
Eval loss: 0.000246
Loss: 0.000307
Eval loss: 0.000216
Loss: 0.006959
Eval loss: 0.000357
Loss: 0.000188
Eval loss: 0.000176
Loss: 0.000136
Eval loss: 0.000163
Loss: 0.000371
Eval loss: 0.000316
Loss: 0.000273
Eval loss: 0.000189


KeyboardInterrupt: 

In [33]:
verbosity = 1
s = 3
m = 50 # hidden vector
n = 25 # observed vector
q = s/m
printv((s,m,n,q),verbosity,1)
max_steps = 10000
eval_steps = 100      
#alpha_list = [1e-2] #[1e-4, 1e-3, 1e-2]
batch_size = 256 # \Om(m * s)
th = 0.5
eta = 0.5 # 0.1 * m/s
A = make_A(m,n,verbosity)
#[(lambda M: init_close(M,0), "0")]:
(init, st) = (1/math.sqrt(n)*np.random.randn(m,n).astype(np.float32), "random")
train_dl_and_eval(A, m, s, 
                  batch_size, max_steps, eval_steps, eta, f=None, init=init, st=st, th=0.5, verbosity=verbosity)

(3, 50, 25, 0.06)
random
Eval loss: 4.365921
Loss: 0.889012
Eval loss: 0.829253
Loss: 0.636504
Eval loss: 0.630689
Loss: 0.482712
Eval loss: 0.464851
Loss: 0.376087
Eval loss: 0.391867
Loss: 0.352548
Eval loss: 0.352998
Loss: 0.353438
Eval loss: 0.357578
Loss: 0.335885
Eval loss: 0.376927
Loss: 0.357916
Eval loss: 0.367278
Loss: 0.353210
Eval loss: 0.340547
Loss: 0.335479
Eval loss: 0.339017
Loss: 0.346864
Eval loss: 0.330815
Loss: 0.363301
Eval loss: 0.364975
Loss: 0.330875
Eval loss: 0.357095
Loss: 0.350718
Eval loss: 0.355140
Loss: 0.364405
Eval loss: 0.345140
Loss: 0.335705
Eval loss: 0.330015
Loss: 0.337209
Eval loss: 0.336139
Loss: 0.372253
Eval loss: 0.356998


KeyboardInterrupt: 

In [34]:
#over-random doesn't help
verbosity = 1
s = 3
m = 50 # hidden vector
n = 25 # observed vector
q = s/m
printv((s,m,n,q),verbosity,1)
max_steps = 10000
eval_steps = 100      
#alpha_list = [1e-2] #[1e-4, 1e-3, 1e-2]
batch_size = 256 # \Om(m * s)
th = 0.5
eta = 0.5 # 0.1 * m/s
A = make_A(m,n,verbosity)
#[(lambda M: init_close(M,0), "0")]:
(init, st) = (1/math.sqrt(n)*np.random.randn(2*m,n).astype(np.float32), "overrandom")
train_dl_and_eval(A, m, s, 
                  batch_size, max_steps, eval_steps, eta, f=None, init=init, st=st, th=0.5, verbosity=verbosity)

(3, 50, 25, 0.06)
overrandom
Eval loss: 13.724207
Loss: 0.910131
Eval loss: 0.905210
Loss: 0.641561
Eval loss: 0.615536
Loss: 0.394422
Eval loss: 0.411497
Loss: 0.311439
Eval loss: 0.347818
Loss: 0.327193
Eval loss: 0.328794
Loss: 0.333421
Eval loss: 0.306531
Loss: 0.301382
Eval loss: 0.316902
Loss: 0.300228
Eval loss: 0.308229
Loss: 0.320846
Eval loss: 0.318754
Loss: 0.313244
Eval loss: 0.315345
Loss: 0.328528
Eval loss: 0.309440
Loss: 0.327069
Eval loss: 0.320913
Loss: 0.314993
Eval loss: 0.316379
Loss: 0.299960
Eval loss: 0.310874
Loss: 0.301422
Eval loss: 0.315611
Loss: 0.310156
Eval loss: 0.321012
Loss: 0.298065
Eval loss: 0.327935
Loss: 0.290115
Eval loss: 0.310388
Loss: 0.316960
Eval loss: 0.325089
Loss: 0.309646
Eval loss: 0.296666
Loss: 0.317349
Eval loss: 0.297856
Loss: 0.295232
Eval loss: 0.305850
Loss: 0.294840
Eval loss: 0.316254
Loss: 0.310111
Eval loss: 0.315197
Loss: 0.304080
Eval loss: 0.319001
Loss: 0.309370
Eval loss: 0.304529
Loss: 0.310029
Eval loss: 0.316288
Loss:

KeyboardInterrupt: 

In [8]:
f = open('am_dl_3_50_25.txt', 'w')
s = 2
m = 50 # hidden vector
n = 25 # observed vector
q = s/m
verbosity=1
printv((s,m,n,q),verbosity,1)
max_steps = 2000
eval_steps = 100      
#alpha_list = [1e-2] #[1e-4, 1e-3, 1e-2]
batch_size = 256 # \Om(m * s)
th = 0.5
eta = 0.1 # 0.1 * m/s
A = make_A(m,n,verbosity)

(init, st) = (make_data_set_pm1(A,m,m,s), "samples")

train_dl_and_eval(A, m, s, batch_size, max_steps, eval_steps, eta, f, init, st, th=0.5, verbosity=verbosity)

(2, 50, 25, 0.04)
samples
Eval loss: 97.339195
Loss: 0.563004
Eval loss: 0.597998
Loss: 0.563449
Eval loss: 0.537957
Loss: 0.517553
Eval loss: 0.469395
Loss: 0.484128
Eval loss: 0.497124
Loss: 0.446223
Eval loss: 0.442810
Loss: 0.388161
Eval loss: 0.380906
Loss: 0.396676
Eval loss: 0.346194
Loss: 0.339294
Eval loss: 0.327708
Loss: 0.296368
Eval loss: 0.285892
Loss: 0.267755
Eval loss: 0.286392
Loss: 0.263734
Eval loss: 0.245057
Loss: 0.223609
Eval loss: 0.217525
Loss: 0.204178
Eval loss: 0.221635
Loss: 0.199921
Eval loss: 0.190497
Loss: 0.200614
Eval loss: 0.203056
Loss: 0.186150
Eval loss: 0.175817
Loss: 0.182036
Eval loss: 0.180649
Loss: 0.173400
Eval loss: 0.179889
Loss: 0.185900
Eval loss: 0.173332
Loss: 0.164482
Eval loss: 0.162231
Distance from rows of A:
[0.82608473754781919, 0.82446225151570973, 0.88454003199849363, 0.80046608664300034, 0.81381677709768685, 0.18826460490099117, 0.64814328912864172, 0.88819684677962807, 0.90997811470273604, 0.87106536090684317, 0.145103446030324

(0.16223095014076688,
 array([[ -2.17e-01,   2.11e-01,   8.61e-04,  -4.69e-02,   8.17e-03,
           1.41e-01,   3.00e-03,  -3.58e-02,   8.22e-02,   1.49e-01,
           2.72e-02,   4.79e-03,  -1.09e-01,  -4.72e-02,   1.17e-01,
          -8.93e-02,   1.01e-01,   1.25e-01,   5.14e-02,  -8.41e-02,
           1.04e-02,   5.70e-02,   2.43e-03,   6.57e-02,  -4.64e-02],
        [  2.78e-01,  -4.27e-02,  -1.49e-01,   8.96e-02,  -1.35e-01,
           3.33e-01,   1.11e-01,  -7.68e-02,   4.91e-02,   2.50e-01,
           1.99e-01,   5.86e-02,  -5.77e-03,  -9.99e-02,  -1.17e-01,
           8.85e-02,  -3.23e-01,  -2.24e-02,   6.17e-02,   3.29e-01,
           2.32e-02,  -1.39e-01,  -7.70e-02,   1.11e-01,  -6.05e-02],
        [  2.05e-01,   8.62e-02,  -9.26e-02,  -2.48e-02,   5.33e-02,
          -1.01e-01,   3.56e-02,   4.54e-02,   2.08e-02,  -8.66e-02,
          -9.52e-02,   6.51e-02,  -4.16e-02,  -4.46e-02,   1.20e-01,
          -7.63e-02,  -1.04e-01,  -1.85e-01,   9.60e-02,   1.00e-01,
          

In [13]:
f = open('am_dl_3_50_25.txt', 'w')
s = 4
m = 50 # hidden vector
n = 25 # observed vector
q = s/m
verbosity=1
printv((s,m,n,q),verbosity,1)
max_steps = 2000
eval_steps = 100      
#alpha_list = [1e-2] #[1e-4, 1e-3, 1e-2]
batch_size = 256 # \Om(m * s)
th = 0.5
eta = 0.1 # 0.1 * m/s
A = make_A(m,n,verbosity)

#(init, st) = (A, "A")
#(make_data_set_pm1(A,m,m,s), "samples")
(init, st) =(make_data_set_pm1(A,m,m,s), "oversamples")

train_dl_and_eval(A, m, s, batch_size, max_steps, eval_steps, eta, f, init, st, th=0.5, verbosity=verbosity)

(4, 50, 25, 0.08)
oversamples
Eval loss: 1106.111145
Loss: 0.994051
Eval loss: 1.016171
Loss: 0.967187
Eval loss: 0.913975
Loss: 0.907746
Eval loss: 0.966177
Loss: 0.926301
Eval loss: 0.908335
Loss: 0.855568
Eval loss: 0.895611
Loss: 0.852077
Eval loss: 0.816879
Loss: 0.841349
Eval loss: 0.810164
Loss: 0.740841
Eval loss: 0.783219
Loss: 0.710365
Eval loss: 0.758974
Loss: 0.720213
Eval loss: 0.710803
Loss: 0.675172
Eval loss: 0.673049
Loss: 0.657788
Eval loss: 0.660033
Loss: 0.651990
Eval loss: 0.660963
Loss: 0.622149
Eval loss: 0.643134
Loss: 0.585277
Eval loss: 0.626799
Loss: 0.569385
Eval loss: 0.564929
Loss: 0.554324
Eval loss: 0.558923
Loss: 0.508855
Eval loss: 0.585104
Loss: 0.514118
Eval loss: 0.499789
Loss: 0.531051
Eval loss: 0.488877
Distance from rows of A:
[0.31304119270141945, 0.30956016663375313, 0.17267946968362144, 0.32342479450663925, 0.37624017222185191, 0.17426833145231774, 0.18569071646713289, 0.96078268425537117, 0.33211415741754424, 0.96376996486935529, 0.285981783

(0.48887744711893777,
 array([[  2.69e-03,   9.97e-04,   9.30e-04,  -2.42e-01,  -1.91e-01,
          -7.05e-02,   1.90e-01,  -9.69e-02,  -3.45e-01,  -2.86e-01,
          -9.17e-02,   1.63e-01,  -3.97e-02,   1.79e-01,  -9.96e-02,
           1.54e-01,   1.01e-01,   8.23e-03,  -1.22e-01,   5.19e-03,
          -1.78e-01,   2.51e-01,  -4.45e-02,  -1.19e-01,  -4.59e-01],
        [  2.34e-02,  -4.26e-02,   1.69e-01,  -1.96e-02,   2.43e-01,
          -1.27e-02,   4.10e-01,  -3.62e-01,   3.61e-01,  -4.54e-02,
          -3.08e-01,  -3.89e-03,   3.29e-02,   9.29e-03,   5.67e-02,
           5.74e-02,  -3.79e-04,   1.15e-01,  -2.03e-01,   1.58e-01,
           1.07e-01,  -1.11e-01,  -1.18e-01,   1.82e-02,   2.08e-01],
        [ -4.69e-02,  -5.63e-02,  -7.61e-02,   8.49e-02,  -1.82e-01,
          -2.08e-02,  -1.27e-01,  -2.06e-01,   1.99e-02,   3.78e-02,
          -1.46e-01,   5.87e-02,  -9.32e-02,   4.31e-02,   1.61e-03,
          -4.57e-02,   6.70e-02,  -3.09e-02,   1.23e-01,  -7.66e-02,
          

In [33]:
#(init, st) = (A, "A")
#(make_data_set_pm1(A,m,m,s), "samples")
#(init, st) =(make_data_set_pm1(A,2*m,m,s), "oversamples")
(init, st) = (init_close(A, 0.5), "0.5")
max_steps = 2000
eval_steps = 100
eta=0.1
train_dl_and_eval(A, m, s, batch_size, max_steps, eval_steps, eta, f, init, st, th=0.5, verbosity=verbosity)

0.5
Eval loss: 20.257371
Loss: 0.851492
Eval loss: 0.859867
Loss: 0.764482
Eval loss: 0.741232
Loss: 0.641422
Eval loss: 0.644407
Loss: 0.567025
Eval loss: 0.613088
Loss: 0.566368
Eval loss: 0.537374
Loss: 0.560995
Eval loss: 0.552762
Loss: 0.506703
Eval loss: 0.510560
Loss: 0.477636
Eval loss: 0.475650
Loss: 0.461405
Eval loss: 0.482890
Loss: 0.464960
Eval loss: 0.496159
Loss: 0.464852
Eval loss: 0.469255
Loss: 0.476298
Eval loss: 0.447683
Loss: 0.471332
Eval loss: 0.445592
Loss: 0.448290
Eval loss: 0.455811
Loss: 0.485837
Eval loss: 0.452891
Loss: 0.456269
Eval loss: 0.480209
Loss: 0.449503
Eval loss: 0.475828
Loss: 0.460859
Eval loss: 0.462457
Loss: 0.475297
Eval loss: 0.444235
Loss: 0.461066
Eval loss: 0.463580
Distance from rows of A:
[0.21923617076127086, 0.23613755820673962, 0.24196976315736737, 0.29766055814200543, 0.19300789424055592, 0.27695690581012511, 0.2456743006982762, 0.2842850121911033, 0.25874459195348432, 0.23042388069351233, 0.25845053017079983, 0.21410478569436661,

(0.46358046280171128,
 array([[  3.23e-01,   1.22e-01,   9.87e-02,  -1.21e-01,   2.21e-01,
          -9.33e-02,   1.99e-01,  -1.71e-01,  -1.06e-02,   1.69e-01,
           8.95e-02,  -2.75e-01,  -1.40e-01,  -1.20e-01,  -3.25e-03,
          -2.75e-01,  -1.11e-01,  -1.34e-01,  -7.98e-02,  -3.81e-02,
           1.70e-01,  -2.09e-01,  -1.19e-01,  -2.54e-01,  -1.68e-01],
        [ -6.85e-03,   2.55e-01,   6.12e-02,  -9.77e-02,   7.42e-02,
           1.34e-01,  -2.47e-02,   5.53e-02,  -2.00e-01,  -1.60e-01,
          -3.97e-02,   1.95e-01,   8.11e-02,   3.26e-01,  -2.40e-01,
           6.74e-02,  -5.70e-02,  -1.02e-01,  -1.09e-01,   3.14e-02,
          -2.61e-02,   1.91e-01,  -3.40e-01,  -6.45e-02,   2.92e-01],
        [ -6.47e-02,   1.07e-01,  -7.41e-02,  -1.19e-01,  -2.07e-01,
           1.06e-01,   7.61e-02,  -5.82e-02,  -5.06e-02,  -4.87e-02,
           2.01e-03,   4.90e-02,   8.45e-02,   4.61e-02,  -3.55e-01,
          -1.97e-01,   7.42e-02,   1.71e-02,   8.23e-02,   1.36e-01,
          

In [34]:
data=[]
q= s/m
printv((s,m,n,q),verbosity,1)
A = make_A(m,n,verbosity)
for (init,st) in [(init_close(A, 0.05), "0.05")]:
    data.append((m,n,s, loss, mins1, mins2, mins3, AB))

(4, 50, 25, 0.08)


In [36]:
import pickle
with open('test.pickle', 'wb') as f:
# Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)

In [28]:
import math

def get_frac(li, i):
    j = int(math.floor(i))
    if i==j:
        return li[j]
    else:
        return (1-(i-j)) * li[j] + (i-j) * li[j+1]

def percentiles(li, ps):
    lis = sorted(li)
    l = len(li)
    return [get_frac(lis, i*(l-1)) for i in ps]

def quartiles(li):
    return percentiles(li, [0,.25, .5, .75, 1])

percentiles([1,2,3,4,5,6,7], [0,.25,.5,.75,1])

[1, 2.5, 4, 5.5, 7]

In [1]:
import pickle

with open('am_dls_data.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    data = pickle.load(f)

In [11]:
len(data)
#'hi %f' % 4.5

182

In [10]:
'hi %s' % [1.2, 3.4, 5.66]
[1,2][1]
i=0

SyntaxError: invalid syntax (<ipython-input-10-dffc799f2393>, line 4)

In [2]:
data2=[]
i=0
def get_st(i):
    return ["0.05", "0.1", "0.2", "0.5", "random", "samples", "oversamples"][i%7]
for (m,n,s,loss, mins1, mins2, mins3, A, B, AB) in data:
    data2.append((m,n,s, get_st(i), loss, mins1, mins2, mins3, A, B, AB))
    i = i+1
with open('am_dls_data.pickle', 'wb') as f:
    pickle.dump(data2, f, pickle.HIGHEST_PROTOCOL)

In [25]:
with open('am_dls_data.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    data = pickle.load(f)

In [29]:
def quartiles(li):
    return percentiles(li, [0,.25, .5, .75, 1])


f = open('am_dls_data.txt', 'w')

for (m,n,s, st, loss, mins1, mins2, mins3, A, B, AB) in data:
    f.write(str((m,n,s,st)))
    f.write("\n")
    f.write(" Loss: %f\n" % loss)
    f.write(" Distance from rows of A: %s\n" % mins1)
    f.write("  Quartiles: %s\n" % quartiles(mins1))
    f.write(" Distance from rows of A (after normalization): %s\n" % mins2)
    f.write("  Quartiles: %s\n" % quartiles(mins2))
    f.write(" Distance from rows of B (after normalization): %s\n" % mins3)
    f.write("  Quartiles: %s\n" % quartiles(mins3))
f.close()


In [34]:
import numpy as np
np.set_printoptions(threshold=np.inf)
np.set_printoptions(precision=2)
data[11][10]

array([[  3.60e-01,  -4.58e-03,  -1.31e-01,   2.37e-01,   8.36e-01,
         -1.43e-01,  -2.82e-02,  -7.30e-02,   1.12e-01,  -1.85e-01,
          1.64e-02,   9.75e-02,  -1.16e-01,  -1.05e-02,   3.38e-01,
         -3.28e-01,   1.48e-01,  -7.03e-02,  -9.35e-03,   1.23e-01,
          1.91e-01,  -1.22e-02,   5.13e-03,  -1.10e-01,  -3.46e-02,
         -3.45e-01,  -2.68e-02,   7.39e-02,   1.91e-01,   1.13e-02,
          2.00e-02,  -2.40e-01,  -7.24e-02,  -6.04e-02,   1.62e-01,
          1.56e-01,  -3.02e-02,   1.88e-01,   1.56e-01,   6.31e-02,
         -7.06e-02,   8.76e-02,   1.46e-01,  -1.12e-03,   1.14e-01,
         -8.81e-02,   9.56e-02,  -1.28e-02,   2.15e-01,  -4.23e-01],
       [  3.78e-02,   1.10e-01,  -2.43e-01,   4.68e-02,   1.21e-01,
         -1.65e-01,  -2.38e-01,   1.52e-01,   9.71e-01,   4.71e-02,
         -1.57e-01,   1.14e-01,  -8.14e-02,  -4.05e-02,   1.16e-01,
         -1.10e-01,   7.35e-02,   6.53e-02,  -1.91e-01,  -2.16e-01,
          3.68e-02,   9.82e-02,  -4.44e-02,   3

In [38]:
def thmap(lli, th):
    hs = [[x if x>th or x<-th else 0 for x in li] for li in lli]
    return hs

thmap([[0.4, 0.6], [-0.6, -0.4]], 0.5)


[[0, 0.6], [-0.6, 0]]

In [40]:
thmap(data[11][10] ,0.5)

[[0,
  0,
  0,
  0,
  0.8358808199519453,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0.97106461828158475,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  -0.70377247364125273,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  -0.94585998569486607,
  0,

In [41]:
print(thmap(data[88][10], 0.5)) #200, 100, 8, random


[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.706763266877804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [42]:
print(thmap(data[89][10], 0.5))


[[0, 0, 0, 0, 0, -0.82721738935205114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [43]:
print(thmap(data[90][10], 0.5))

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [44]:
data[90]

(200,
 100.0,
 8,
 'oversamples',
 3.2696570290950997,
 [0.99662204441233793,
  0.95107822973296496,
  0.93865241177900816,
  0.9772069395549261,
  0.96664075109348147,
  0.97698467731007854,
  0.99957430409838999,
  0.99632643079054839,
  0.88096439418002814,
  0.96774382208075971,
  0.92214032318004413,
  0.9991423388462366,
  1.0185906444873567,
  0.96981129195123483,
  0.91000059187857407,
  0.97634398254307442,
  0.96950466219251608,
  0.97434868744688974,
  1.0058929511655297,
  0.99037809011395361,
  0.97077795210143569,
  0.98545306635925822,
  1.0004298777763962,
  0.95270932898576022,
  0.95511596095901719,
  0.98457887206413064,
  0.97990773872837744,
  0.95390695677515946,
  0.97378867572382499,
  0.97367369752938437,
  0.99578623227031438,
  0.93364877855489858,
  0.96203253120883436,
  0.96461141646953119,
  0.98533616690372372,
  0.96497487294652529,
  0.97525831251262451,
  0.97622893237503661,
  0.97965111776582259,
  0.97973891571077742,
  0.94417805820477796,
  0.971

In [45]:
print(thmap(data[88][10], 0.3)) #200, 100, 8, random

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.706763266877804, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [46]:
np.shape([[1,2]])

(1, 2)

In [47]:
list(range(5))

[0, 1, 2, 3, 4]

In [49]:
mat = np.zeros((1,2,3))

In [50]:
mat

array([[[ 0.,  0.,  0.],
        [ 0.,  0.,  0.]]])

In [52]:
np.ndarray.flatten(mat)

array([ 0.,  0.,  0.,  0.,  0.,  0.])

In [53]:
g = [[1,2,3],[4,5,6],[7,8,9]]

In [54]:
def get_all_patches_as_matrix(arr, psize):
    dims = np.shape(arr)
    mat = np.zeros((dims[0] * (dims[1] - psize + 1) * (dims[2] - psize + 1), psize * psize))
    r = 0
    for i in range(dims[0]):
        for j in range(dims[1] - psize + 1):
            for k in range(dims[2] - psize + 1):
                mat[r] = np.ndarray.flatten(arr[i,j:j+psize, k:k+psize])
                r = r+1
    return mat

In [56]:
get_all_patches_as_matrix(np.array([g]), 2)

array([[ 1.,  2.,  4.,  5.],
       [ 2.,  3.,  5.,  6.],
       [ 4.,  5.,  7.,  8.],
       [ 5.,  6.,  8.,  9.]])

In [57]:
import numpy as np
data = np.zeros( (512,512,3), dtype=np.uint8)
data[256,256] = [255,0,0]

In [58]:
from PIL import Image
import numpy as np

w, h = 512, 512
data = np.zeros((h, w, 3), dtype=np.uint8)
data[256, 256] = [255, 0, 0]
img = Image.fromarray(data, 'RGB')
img.save('my.png')
img.show()

In [72]:
img = Image.fromarray(np.array([[0,2**7,2**8-1],[0,2**7,2**8-1]],np.dtype(np.int16)), 'L')
img.save('test.png')

In [67]:
def image2pixelarray(filepath):
    """
    Parameters
    ----------
    filepath : str
        Path to an image file

    Returns
    -------
    list
        A list of lists which make it simple to access the greyscale value by
        im[y][x]
    """
    im = Image.open(filepath).convert('L')
    (width, height) = im.size
    greyscale_map = list(im.getdata())
    greyscale_map = np.array(greyscale_map)
    greyscale_map = greyscale_map.reshape((height, width))
    return greyscale_map

In [68]:
a=image2pixelarray('grays.png')

In [69]:
a

array([[255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255],
       [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
        255, 255, 25

In [70]:
2**7

128

In [71]:
2**8-1

255

In [73]:
b=image2pixelarray('test.png')
b

array([[0, 0, 0],
       [0, 0, 0]])

In [76]:
img = Image.fromarray(np.array([[0,127,255],[0,127,255]],np.dtype(np.int16)), 'L')
img.save('test.png')

In [77]:
b=image2pixelarray('test.png')
b

array([[  0,   0, 127],
       [  0, 255,   0]])

In [79]:
c = np.array([[0,1.2,3.5]], np.dtype(np.int16))

In [80]:
np.reshape([1,2,3,4],(2,2))

array([[1, 2],
       [3, 4]])

In [86]:
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'


In [82]:
import mnist

ImportError: /lib64/libc.so.6: version `GLIBC_2.17' not found (required by /u/holdenl/anaconda/lib/python2.7/site-packages/tensorflow/python/_pywrap_tensorflow.so)

In [85]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import gzip

import numpy
from six.moves import xrange  # pylint: disable=redefined-builtin

#from tensorflow.contrib.learn.python.learn.datasets import base
#from tensorflow.python.framework import dtypes
#from tensorflow.python.platform import gfile

SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'


def _read32(bytestream):
  dt = numpy.dtype(numpy.uint32).newbyteorder('>')
  return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]


def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting', filename)
  with gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError('Invalid magic number %d in MNIST image file: %s' %
                       (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data

In [87]:
a = extract_images(TRAIN_IMAGES)

Extracting train-images-idx3-ubyte.gz


NameError: global name 'gfile' is not defined

In [88]:
import os, struct
from array import array as pyarray
from numpy import append, array, int8, uint8, zeros

def load_mnist(dataset="training", digits=np.arange(10), path="."):
    """
    Loads MNIST files into 3D numpy arrays

    Adapted from: http://abel.ee.ucla.edu/cvxopt/_downloads/mnist.py
    """

    if dataset == "training":
        fname_img = os.path.join(path, 'train-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte')
    elif dataset == "testing":
        fname_img = os.path.join(path, 't10k-images-idx3-ubyte')
        fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte')
    else:
        raise ValueError("dataset must be 'testing' or 'training'")

    flbl = open(fname_lbl, 'rb')
    magic_nr, size = struct.unpack(">II", flbl.read(8))
    lbl = pyarray("b", flbl.read())
    flbl.close()

    fimg = open(fname_img, 'rb')
    magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
    img = pyarray("B", fimg.read())
    fimg.close()

    ind = [ k for k in range(size) if lbl[k] in digits ]
    N = len(ind)

    images = zeros((N, rows, cols), dtype=uint8)
    labels = zeros((N, 1), dtype=int8)
    for i in range(len(ind)):
        images[i] = array(img[ ind[i]*rows*cols : (ind[i]+1)*rows*cols ]).reshape((rows, cols))
        labels[i] = lbl[ind[i]]

    return images, labels

In [89]:
images, labels = load_mnist()

In [90]:
np.shape(images)

(60000, 28, 28)

In [91]:
images[0][0:5]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0]], dtype=uint8)

In [92]:
images[0]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  

In [98]:
X = np.transpose(np.array([[0,125, 50],[30, 60, 90]]))
X = X - np.tile(np.mean(X,0),(X.shape[0],1))
X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1)))
X = np.transpose(X)
X

array([[-0.66,  0.75, -0.09],
       [-0.71,  0.  ,  0.71]])

In [None]:
X = X - np.tile(np.mean(X,0),(X.shape[0],1))
X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),(X.shape[0],1)),dtype = myfloat)
param = { 'K' : 100, # learns a dictionary with 100 elements
          'lambda1' : 0.15, 'numThreads' : 4, 'batchsize' : 400,
          'iter' : 1000}

########## FIRST EXPERIMENT ###########
tic = time.time()
D = spams.trainDL(X,**param)
tac = time.time()
t = tac - tic
print 'time of computation for Dictionary Learning: %f' %t

##param['approx'] = 0
# save dictionnary as dict.png
_objective(X,D,param,'dict')


In [1]:
from dl_convergence import *

SyntaxError: invalid syntax (dl_convergence.py, line 41)