In [3]:
using Zygote
using ITensors
using Random

In [4]:
function angle_encoder(x::Float64) 
    """Function to convert normalised time series to an angle encoding."""
    @assert x <= 1.0 && x >= 0.0 "Data points must be rescaled between 1 and 0 before encoding using the angle encoder."
    s1 = exp(1im * (3π/2) * x) * cospi(0.5 * x)
    s2 = exp(-1im * (3π/2) * x) * sinpi(0.5 * x)
    return [s1, s2]
 
end

angle_encoder (generic function with 1 method)

In [5]:
function normalised_data_to_product_state(sample::Vector, site_indices::Vector{Index{Int64}})
    """Function to convert a single normalised sample to a product state
    with local dimension 2, as specified by the feature map."""

    @assert length(sample) == length(site_indices) "Mismatch between number of sites and sample length."

    product_state = MPS([ITensor(angle_encoder(sample[i]), site_indices[i]) for i in eachindex(site_indices)])

    return product_state

end

normalised_data_to_product_state (generic function with 1 method)

In [12]:
s = siteinds("S=1/2", 10);
mps = randomMPS(ComplexF64, s; linkdims=5);
label_index = Index(2, "f(x)")
label_tensor = randomITensor(label_index);
# attach to mps
mps[1] *= label_tensor
normalize!(mps)

MPS
[1] ((dim=2|id=984|"S=1/2,Site,n=1"), (dim=5|id=186|"Link,l=1"), (dim=2|id=105|"f(x)"))
[2] ((dim=5|id=186|"Link,l=1"), (dim=2|id=242|"S=1/2,Site,n=2"), (dim=5|id=255|"Link,l=2"))
[3] ((dim=5|id=255|"Link,l=2"), (dim=2|id=428|"S=1/2,Site,n=3"), (dim=5|id=502|"Link,l=3"))
[4] ((dim=5|id=502|"Link,l=3"), (dim=2|id=191|"S=1/2,Site,n=4"), (dim=5|id=28|"Link,l=4"))
[5] ((dim=5|id=28|"Link,l=4"), (dim=2|id=965|"S=1/2,Site,n=5"), (dim=5|id=115|"Link,l=5"))
[6] ((dim=5|id=115|"Link,l=5"), (dim=2|id=696|"S=1/2,Site,n=6"), (dim=5|id=477|"Link,l=6"))
[7] ((dim=5|id=477|"Link,l=6"), (dim=2|id=676|"S=1/2,Site,n=7"), (dim=5|id=376|"Link,l=7"))
[8] ((dim=5|id=376|"Link,l=7"), (dim=2|id=334|"S=1/2,Site,n=8"), (dim=4|id=416|"Link,l=8"))
[9] ((dim=4|id=416|"Link,l=8"), (dim=2|id=177|"S=1/2,Site,n=9"), (dim=2|id=594|"Link,l=9"))
[10] ((dim=2|id=594|"Link,l=9"), (dim=2|id=637|"S=1/2,Site,n=10"))


In [13]:
sample = rand(10)
ps = normalised_data_to_product_state(sample, s)

MPS
[1] ((dim=2|id=984|"S=1/2,Site,n=1"),)
[2] ((dim=2|id=242|"S=1/2,Site,n=2"),)
[3] ((dim=2|id=428|"S=1/2,Site,n=3"),)
[4] ((dim=2|id=191|"S=1/2,Site,n=4"),)
[5] ((dim=2|id=965|"S=1/2,Site,n=5"),)
[6] ((dim=2|id=696|"S=1/2,Site,n=6"),)
[7] ((dim=2|id=676|"S=1/2,Site,n=7"),)
[8] ((dim=2|id=334|"S=1/2,Site,n=8"),)
[9] ((dim=2|id=177|"S=1/2,Site,n=9"),)
[10] ((dim=2|id=637|"S=1/2,Site,n=10"),)


In [26]:
function overlap(ps, mps)
    res = 1
    for i in 1:10
        res *= conj(ps[i]) * mps[i]
    end
    return res
end

overlap (generic function with 1 method)

In [79]:
out = overlap(ps, mps)
out.tensor

Dim 1: (dim=2|id=105|"f(x)")
NDTensors.Dense{ComplexF64, Vector{ComplexF64}}
 2-element
 0.009762478276198694 - 0.0027419903200319117im
 -0.04158909984160018 + 0.01168114345130324im

Make a bond tensor

In [28]:
bt = mps[1] * mps[2]

ITensor ord=4 (dim=2|id=984|"S=1/2,Site,n=1") (dim=2|id=105|"f(x)") (dim=2|id=242|"S=1/2,Site,n=2") (dim=5|id=255|"Link,l=2")
NDTensors.Dense{ComplexF64, Vector{ComplexF64}}

In [91]:
function loss(bt, mps, ps)
    # pretend the ground truth label is y = 1
    ground_truth_label = 1
    phi_tilde = (ps[1]) * (ps[2])
    for i in 3:10
        phi_tilde *= (ps[i]) * mps[i]
    end
    yhat = (phi_tilde) * bt  
    # select the 
    label_idx = first(inds(yhat))
    y = onehot(label_idx => (ground_truth_label + 1))
    f_ln = first(yhat *y)
    loss = -log(abs2.(f_ln))
    return loss    
end

loss (generic function with 1 method)

Now, using Zygote to compute the gradient...

In [92]:
l = x -> loss(x, mps, ps);

In [93]:
l(bt)

5.93171340961374

In [94]:
out, = gradient(l, bt)

(ITensor ord=4
Dim 1: (dim=2|id=984|"S=1/2,Site,n=1")
Dim 2: (dim=2|id=242|"S=1/2,Site,n=2")
Dim 3: (dim=5|id=255|"Link,l=2")
Dim 4: (dim=2|id=105|"f(x)")
NDTensors.Dense{ComplexF64, Vector{ComplexF64}}
 2×2×5×2
[:, :, 1, 1] =
 0.0 + 0.0im  0.0 + 0.0im
 0.0 + 0.0im  0.0 + 0.0im

[:, :, 2, 1] =
 0.0 + 0.0im  0.0 + 0.0im
 0.0 + 0.0im  0.0 + 0.0im

[:, :, 3, 1] =
 0.0 + 0.0im  0.0 + 0.0im
 0.0 + 0.0im  0.0 + 0.0im

[:, :, 4, 1] =
 0.0 + 0.0im  0.0 + 0.0im
 0.0 + 0.0im  0.0 + 0.0im

[:, :, 5, 1] =
 0.0 + 0.0im  0.0 + 0.0im
 0.0 + 0.0im  0.0 + 0.0im

[:, :, 1, 2] =
 -0.13699272724542197 - 0.06672814063163042im  …  1.1542484544093154 - 0.35651522276696856im
 -0.13292114193147536 + 0.1135326515332268im      0.1535991666847198 - 1.3773189911079966im

[:, :, 2, 2] =
 -0.029365154596603608 + 0.11071489275460314im  …  -0.4302359914675488 - 0.7996977134048106im
   0.10300361467439788 + 0.08158837609105445im     -1.0381582911763692 + 0.08630729761959328im

[:, :, 3, 2] =
 -0.030178728327816044 + 0.

In [95]:
out.tensor[:,:,:,2]

Dim 1: 2
Dim 2: 2
Dim 3: 5
NDTensors.Dense{ComplexF64, Base.ReshapedArray{ComplexF64, 1, SubArray{ComplexF64, 3, Array{ComplexF64, 4}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Tuple{}}}
 2×2×5
[:, :, 1] =
 -0.13699272724542197 - 0.06672814063163042im  …  1.1542484544093154 - 0.35651522276696856im
 -0.13292114193147536 + 0.1135326515332268im      0.1535991666847198 - 1.3773189911079966im

[:, :, 2] =
 -0.029365154596603608 + 0.11071489275460314im  …  -0.4302359914675488 - 0.7996977134048106im
   0.10300361467439788 + 0.08158837609105445im     -1.0381582911763692 + 0.08630729761959328im

[:, :, 3] =
 -0.030178728327816044 + 0.2458503121349326im   …  -1.1580232764359428 - 1.5859146917812041im
   0.24476820913918634 + 0.14432917860762984im     -2.1984005684526564 + 0.4917563503147393im

[:, :, 4] =
 -0.26207616221948443 + 0.21633384157748067im  …  0.34357926020482865 - 2.6721385593710116im
   0.1075253027235096 + 0.37

Now for the analytical gradient...

In [96]:
function grad_analytical(bt, mps, ps)
    ground_truth_label = 1
    phi_tilde = ps[1] * ps[2]
    for i in 3:10
        phi_tilde *= ps[i] * mps[i]
    end
    yhat = phi_tilde * bt
    # select the 
    label_idx = first(inds(yhat))
    y = onehot(label_idx => (ground_truth_label + 1))
    f_ln = first(yhat * y)
    grad = - y * conj(phi_tilde/(f_ln))
    return grad
    
end

grad_analytical (generic function with 1 method)

In [97]:
grad_analyt = grad_analytical(bt, mps, ps);

In [99]:
2 * grad_analyt.tensor[2, :, :, :]

Dim 1: 2
Dim 2: 2
Dim 3: 5
NDTensors.Dense{ComplexF64, Vector{ComplexF64}}
 2×2×5
[:, :, 1] =
 -0.13699272724542197 - 0.06672814063163042im  …  1.1542484544093154 - 0.35651522276696856im
 -0.13292114193147536 + 0.1135326515332268im      0.1535991666847198 - 1.3773189911079966im

[:, :, 2] =
 -0.029365154596603604 + 0.11071489275460314im  …  -0.4302359914675488 - 0.7996977134048106im
   0.10300361467439788 + 0.08158837609105443im     -1.0381582911763692 + 0.08630729761959328im

[:, :, 3] =
 -0.030178728327816054 + 0.2458503121349326im   …  -1.1580232764359426 - 1.5859146917812041im
   0.24476820913918634 + 0.14432917860762984im      -2.198400568452657 + 0.49175635031473935im

[:, :, 4] =
 -0.26207616221948443 + 0.21633384157748067im  …  0.34357926020482854 - 2.6721385593710116im
  0.10752530272350962 + 0.3747248512526734im       -2.653248011399579 - 1.5850831724031746im

[:, :, 5] =
 -0.12967874704674678 + 0.3163290363273722im  …  -0.9644058952342874 - 2.532993856180047im
  0.2733327426