In [3]:
using ITensors
using OptimKit
using Random
using Distributions

# Small Scale OptimKit test

Create mock data and MPS

In [4]:
sites = siteinds("S=1/2", 10)

10-element Vector{Index{Int64}}:
 (dim=2|id=251|"S=1/2,Site,n=1")
 (dim=2|id=970|"S=1/2,Site,n=2")
 (dim=2|id=974|"S=1/2,Site,n=3")
 (dim=2|id=507|"S=1/2,Site,n=4")
 (dim=2|id=519|"S=1/2,Site,n=5")
 (dim=2|id=620|"S=1/2,Site,n=6")
 (dim=2|id=591|"S=1/2,Site,n=7")
 (dim=2|id=677|"S=1/2,Site,n=8")
 (dim=2|id=440|"S=1/2,Site,n=9")
 (dim=2|id=348|"S=1/2,Site,n=10")

In [5]:
ψ = randomMPS(sites; linkdims=2)

MPS
[1] ((dim=2|id=251|"S=1/2,Site,n=1"), (dim=2|id=306|"Link,l=1"))
[2] ((dim=2|id=306|"Link,l=1"), (dim=2|id=970|"S=1/2,Site,n=2"), (dim=2|id=316|"Link,l=2"))
[3] ((dim=2|id=316|"Link,l=2"), (dim=2|id=974|"S=1/2,Site,n=3"), (dim=2|id=972|"Link,l=3"))
[4] ((dim=2|id=972|"Link,l=3"), (dim=2|id=507|"S=1/2,Site,n=4"), (dim=2|id=987|"Link,l=4"))
[5] ((dim=2|id=987|"Link,l=4"), (dim=2|id=519|"S=1/2,Site,n=5"), (dim=2|id=60|"Link,l=5"))
[6] ((dim=2|id=60|"Link,l=5"), (dim=2|id=620|"S=1/2,Site,n=6"), (dim=2|id=25|"Link,l=6"))
[7] ((dim=2|id=25|"Link,l=6"), (dim=2|id=591|"S=1/2,Site,n=7"), (dim=2|id=206|"Link,l=7"))
[8] ((dim=2|id=206|"Link,l=7"), (dim=2|id=677|"S=1/2,Site,n=8"), (dim=2|id=69|"Link,l=8"))
[9] ((dim=2|id=69|"Link,l=8"), (dim=2|id=440|"S=1/2,Site,n=9"), (dim=2|id=839|"Link,l=9"))
[10] ((dim=2|id=839|"Link,l=9"), (dim=2|id=348|"S=1/2,Site,n=10"))


In [6]:
train_data = rand([0, 1], 2, 10)
train_labels = [0 ; 1];

In [7]:
function encoder(dat, sites)
    ϕ = MPS(sites; linkdims=1)
    for i=1:length(sites)
        T = ITensor(sites[i])
        T[(dat[i] +1)] = 1
        ϕ[i] = T
    end
    return ϕ
end

encoder (generic function with 1 method)

In [8]:
ϕ1 = encoder(train_data[1, :], sites);
ϕ2 = encoder(train_data[2, :], sites);

Let's contract the product states with our random initial MPS to see what values we get...

In [9]:
inner(ψ, ϕ1)

0.014741835554575066

In [10]:
inner(ψ, ϕ2)

-0.007035887508537679

Convert to probabilities

In [11]:
Z = inner(ψ, ψ)

1.0000000000000002

In [12]:
p1 = (abs(inner(ψ, ϕ1)))^2 / (Z) # label 0

0.00021732171551813351

In [13]:
p2 = (abs(inner(ψ, ϕ2)))^2 / (Z) # label 1

4.950371303279654e-5

# Approach 2

In [14]:
ψ

MPS
[1] ((dim=2|id=251|"S=1/2,Site,n=1"), (dim=2|id=306|"Link,l=1"))
[2] ((dim=2|id=306|"Link,l=1"), (dim=2|id=970|"S=1/2,Site,n=2"), (dim=2|id=316|"Link,l=2"))
[3] ((dim=2|id=316|"Link,l=2"), (dim=2|id=974|"S=1/2,Site,n=3"), (dim=2|id=972|"Link,l=3"))
[4] ((dim=2|id=972|"Link,l=3"), (dim=2|id=507|"S=1/2,Site,n=4"), (dim=2|id=987|"Link,l=4"))
[5] ((dim=2|id=987|"Link,l=4"), (dim=2|id=519|"S=1/2,Site,n=5"), (dim=2|id=60|"Link,l=5"))
[6] ((dim=2|id=60|"Link,l=5"), (dim=2|id=620|"S=1/2,Site,n=6"), (dim=2|id=25|"Link,l=6"))
[7] ((dim=2|id=25|"Link,l=6"), (dim=2|id=591|"S=1/2,Site,n=7"), (dim=2|id=206|"Link,l=7"))
[8] ((dim=2|id=206|"Link,l=7"), (dim=2|id=677|"S=1/2,Site,n=8"), (dim=2|id=69|"Link,l=8"))
[9] ((dim=2|id=69|"Link,l=8"), (dim=2|id=440|"S=1/2,Site,n=9"), (dim=2|id=839|"Link,l=9"))
[10] ((dim=2|id=839|"Link,l=9"), (dim=2|id=348|"S=1/2,Site,n=10"))


In [15]:
decision_index = Index(2, "f(x)")

(dim=2|id=100|"f(x)")

In [16]:
site_1_old_inds = inds(deepcopy(ψ[1]))
site_1_new_inds = site_1_old_inds, decision_index

(((dim=2|id=251|"S=1/2,Site,n=1"), (dim=2|id=306|"Link,l=1")), (dim=2|id=100|"f(x)"))

In [17]:
new_tensor = randomITensor(site_1_new_inds)

ITensor ord=3 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=306|"Link,l=1") (dim=2|id=100|"f(x)")
NDTensors.Dense{Float64, Vector{Float64}}

In [18]:
W = deepcopy(ψ)

MPS
[1] ((dim=2|id=251|"S=1/2,Site,n=1"), (dim=2|id=306|"Link,l=1"))
[2] ((dim=2|id=306|"Link,l=1"), (dim=2|id=970|"S=1/2,Site,n=2"), (dim=2|id=316|"Link,l=2"))
[3] ((dim=2|id=316|"Link,l=2"), (dim=2|id=974|"S=1/2,Site,n=3"), (dim=2|id=972|"Link,l=3"))
[4] ((dim=2|id=972|"Link,l=3"), (dim=2|id=507|"S=1/2,Site,n=4"), (dim=2|id=987|"Link,l=4"))
[5] ((dim=2|id=987|"Link,l=4"), (dim=2|id=519|"S=1/2,Site,n=5"), (dim=2|id=60|"Link,l=5"))
[6] ((dim=2|id=60|"Link,l=5"), (dim=2|id=620|"S=1/2,Site,n=6"), (dim=2|id=25|"Link,l=6"))
[7] ((dim=2|id=25|"Link,l=6"), (dim=2|id=591|"S=1/2,Site,n=7"), (dim=2|id=206|"Link,l=7"))
[8] ((dim=2|id=206|"Link,l=7"), (dim=2|id=677|"S=1/2,Site,n=8"), (dim=2|id=69|"Link,l=8"))
[9] ((dim=2|id=69|"Link,l=8"), (dim=2|id=440|"S=1/2,Site,n=9"), (dim=2|id=839|"Link,l=9"))
[10] ((dim=2|id=839|"Link,l=9"), (dim=2|id=348|"S=1/2,Site,n=10"))


In [19]:
W[1] = new_tensor

ITensor ord=3 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=306|"Link,l=1") (dim=2|id=100|"f(x)")
NDTensors.Dense{Float64, Vector{Float64}}

In [20]:
normalize!(W)

MPS
[1] ((dim=2|id=251|"S=1/2,Site,n=1"), (dim=2|id=306|"Link,l=1"), (dim=2|id=100|"f(x)"))
[2] ((dim=2|id=306|"Link,l=1"), (dim=2|id=970|"S=1/2,Site,n=2"), (dim=2|id=316|"Link,l=2"))
[3] ((dim=2|id=316|"Link,l=2"), (dim=2|id=974|"S=1/2,Site,n=3"), (dim=2|id=972|"Link,l=3"))
[4] ((dim=2|id=972|"Link,l=3"), (dim=2|id=507|"S=1/2,Site,n=4"), (dim=2|id=987|"Link,l=4"))
[5] ((dim=2|id=987|"Link,l=4"), (dim=2|id=519|"S=1/2,Site,n=5"), (dim=2|id=60|"Link,l=5"))
[6] ((dim=2|id=60|"Link,l=5"), (dim=2|id=620|"S=1/2,Site,n=6"), (dim=2|id=25|"Link,l=6"))
[7] ((dim=2|id=25|"Link,l=6"), (dim=2|id=591|"S=1/2,Site,n=7"), (dim=2|id=206|"Link,l=7"))
[8] ((dim=2|id=206|"Link,l=7"), (dim=2|id=677|"S=1/2,Site,n=8"), (dim=2|id=69|"Link,l=8"))
[9] ((dim=2|id=69|"Link,l=8"), (dim=2|id=440|"S=1/2,Site,n=9"), (dim=2|id=839|"Link,l=9"))
[10] ((dim=2|id=839|"Link,l=9"), (dim=2|id=348|"S=1/2,Site,n=10"))


In [21]:
res1 = 1
for i=1:10
    res1 *= W[i] * ϕ1[i]
end

In [22]:
inner(res1, res1)

0.000150392604173485

In [23]:
function OutputToProb(yhat::ITensor)
    norm = inner(yhat, yhat)
    return [yhat[i]^2/norm for i=1:ITensors.dim(yhat)]
end

OutputToProb (generic function with 1 method)

In [24]:
yhat = OutputToProb(res1)

2-element Vector{Float64}:
 0.16494342215654556
 0.8350565778434544

Now let's say the true class is class 0, then we take the first element

In [25]:
y = [1; 0] # true prob. distribution
prob = dot(y', yhat)
NLL = -log(prob)

1.8021527599047755

# Make a bond tensor

In [26]:
BT = W[1] * W[2]

ITensor ord=4 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=100|"f(x)") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

Compute phi tilde:

In [27]:
RE = 1
for i=3:length(W)
    RE *= ϕ1[i] * W[i]
end

In [28]:
RE

ITensor ord=1 (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [29]:
phi_tilde = ϕ1[1] * ϕ1[2] * RE

ITensor ord=3 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [30]:
d_yhat_dW = phi_tilde

ITensor ord=3 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [31]:
yhat = BT * d_yhat_dW

ITensor ord=1 (dim=2|id=100|"f(x)")
NDTensors.Dense{Float64, Vector{Float64}}

In [32]:
y = onehot(decision_index => 1)

ITensor ord=1 (dim=2|id=100|"f(x)")
NDTensors.Dense{Float64, Vector{Float64}}

In [33]:
y_yhat = (y * yhat)[]
yhat_yhat = (yhat * yhat)[]

0.00015039260417348496

In [41]:
p1 = y_yhat * (y * d_yhat_dW)
p1 ./= yhat_yhat

ITensor ord=4 (dim=2|id=100|"f(x)") (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [42]:
p2 = y_yhat^2 * (yhat * d_yhat_dW)
p2 ./= (yhat_yhat)^2

ITensor ord=4 (dim=2|id=100|"f(x)") (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [44]:
p = y_yhat^2 / yhat_yhat

0.16494342215654562

In [45]:
grad = -2 * (p1 - p2) ./ p

ITensor ord=4 (dim=2|id=100|"f(x)") (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [50]:
BT_new = BT + 0.8 * grad

ITensor ord=4 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=100|"f(x)") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [54]:
new_yhat = BT_new * RE * ϕ1[1] * ϕ1[2]

ITensor ord=1 (dim=2|id=100|"f(x)")
NDTensors.Dense{Float64, Vector{Float64}}

In [57]:
old_yhat = BT *  RE * ϕ1[1] * ϕ1[2]

ITensor ord=1 (dim=2|id=100|"f(x)")
NDTensors.Dense{Float64, Vector{Float64}}

In [67]:
function ComputeGradient(BT_old, lid, rid, ϕ, y)
    """Function that computes a gradient of the NLL w.r.t. a bond tensor"""
    LE = 1
    RE = 1
    for i=1:(lid-1)
        LE *= W[i] * ϕ[i]
    end
    for i=(rid+1):length(W)
        RE *= W[i] * ϕ[i]
    end
    yhat = BT_old * ϕ[lid] * ϕ[rid] * LE * RE
    
    d_yhat_dW = ϕ[lid] * ϕ[rid] * LE * RE
    label_idx = inds(BT_old, "f(x)")[1]
    y = onehot(label_idx => y)
end

ComputeGradient (generic function with 2 methods)

In [61]:
BT_old = W[1] * W[2]

ITensor ord=4 (dim=2|id=251|"S=1/2,Site,n=1") (dim=2|id=100|"f(x)") (dim=2|id=970|"S=1/2,Site,n=2") (dim=2|id=316|"Link,l=2")
NDTensors.Dense{Float64, Vector{Float64}}

In [66]:
inds(BT_old, "f(x)")[1]

(dim=2|id=100|"f(x)")