In [417]:
using ITensors
using Zygote

In [418]:
s = siteinds("S=1/2", 10)
product_state = MPS(10);
for i in eachindex(product_state)
    product_state[i] = ITensor(rand(ComplexF64, 2), s[i])
end
mps = randomMPS(ComplexF64, s; linkdims=4);

In [419]:
BT = mps[1] * mps[2]

ITensor ord=3 (dim=2|id=576|"S=1/2,Site,n=1") (dim=2|id=728|"S=1/2,Site,n=2") (dim=4|id=977|"Link,l=2")
NDTensors.Dense{ComplexF64, Vector{ComplexF64}}

In [420]:
C_index = Index(2, "C")
C_tensor = ITensor(C_index)
BT_real = real(BT)
BT_imag = imag(BT)
C_tensor_real = ITensor([1; 0], C_index)
C_tensor_imag = ITensor([0; 1], C_index)
BT_real_with_index = BT_real * C_tensor_real
BT_imag_with_index = BT_imag * C_tensor_imag
BT_re_imag = BT_real_with_index + BT_imag_with_index

ITensor ord=4 (dim=2|id=576|"S=1/2,Site,n=1") (dim=2|id=728|"S=1/2,Site,n=2") (dim=4|id=977|"Link,l=2") (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}

In [421]:
function cost_function(BT_combined::ITensor, product_state::MPS, mps::MPS)
    BT_real = deepcopy(BT_combined) * onehot(C_index => 1)
    BT_imag = deepcopy(BT_combined) * onehot(C_index => 2)
    # reform bond tensor
    BT = BT_real + im * BT_imag
    ps = product_state
    y = 1
    phi_tilde = 1
    for i = 3:10
        phi_tilde *= mps[i] * ps[i]
    end
    yhat = phi_tilde * BT * ps[1] * ps[2]
    abs_diff_sq = norm(yhat[] - y)^2
    cost = 0.5 * abs_diff_sq
    return cost
end

cost_function (generic function with 1 method)

In [422]:
cost_function(BT_re_imag, product_state, mps)

0.46799178944117203

Now add in regularisation:

In [423]:
function cost_function_L2(BT_combined::ITensor, product_state::MPS, mps::MPS)
    lambda = 0.55
    BT_real = deepcopy(BT_combined) * onehot(C_index => 1)
    BT_imag = deepcopy(BT_combined) * onehot(C_index => 2)
    # reform bond tensor
    BT = BT_real + im * BT_imag
    ps = product_state
    y = 1
    phi_tilde = 1
    for i = 3:10
        phi_tilde *= mps[i] * ps[i]
    end
    yhat = phi_tilde * BT * ps[1] * ps[2]
    abs_diff_sq = norm(yhat[] - y)^2
    # penalise large deviations from norm(W)^2 = 1
    # strong constraint (|W|^2-1), weak constraint would be |W|^2
    cost = 0.5 * abs_diff_sq + 0.5 * lambda * (norm(BT)^2 -1)^2
    return cost
end

cost_function_L2 (generic function with 1 method)

In [424]:
cost_function_L2(BT_re_imag, product_state, mps)

0.46799178944117203

Derive the gradient with zygote: 

In [425]:
loss = x -> cost_function_L2(x, product_state,mps)
loss2 = x -> cost_function(x, product_state, mps)

#87 (generic function with 1 method)

In [426]:
grad_B = gradient(loss, BT_re_imag)
grad_B2 = gradient(loss2, BT_re_imag)

(ITensor ord=4
Dim 1: (dim=4|id=977|"Link,l=2")
Dim 2: (dim=2|id=728|"S=1/2,Site,n=2")
Dim 3: (dim=2|id=576|"S=1/2,Site,n=1")
Dim 4: (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}
 4×2×2×2
[:, :, 1, 1] =
 -0.04703866224678673   -0.055011988900830755
  0.015270117910640492   0.009119871551106912
 -0.037906472904550195  -0.041117294730222874
 -0.018999135433893574  -0.03645493001204367

[:, :, 2, 1] =
 -0.008464931086763557    -0.002658626140741937
  0.00025949842115075987  -0.005222701729900415
 -0.005906136511201041     9.620636793720182e-5
 -0.007472767505983238    -0.01098763194818186

[:, :, 1, 2] =
 -0.022921136367355186  -0.0652788795933827
  0.02066214582178875    0.04091196504502683
 -0.023334690710183514  -0.059859795598737775
  0.012279687626009719   0.005758586110878443

[:, :, 2, 2] =
 -0.015080480892835191   -0.028088590730350863
  0.008487408954701259    0.012831131735684294
 -0.013474001990996893   -0.024001166859529164
 -0.0002399104435554611  -0.0052969795

In [427]:
grad_B

(ITensor ord=4
Dim 1: (dim=2|id=576|"S=1/2,Site,n=1")
Dim 2: (dim=2|id=728|"S=1/2,Site,n=2")
Dim 3: (dim=4|id=977|"Link,l=2")
Dim 4: (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}
 2×2×4×2
[:, :, 1, 1] =
 -0.04703866224678673   -0.055011988900830755
 -0.008464931086763557  -0.002658626140741937

[:, :, 2, 1] =
 0.015270117910640492     0.009119871551106912
 0.00025949842115075987  -0.005222701729900415

[:, :, 3, 1] =
 -0.037906472904550195  -0.041117294730222874
 -0.005906136511201041   9.620636793720182e-5

[:, :, 4, 1] =
 -0.018999135433893574  -0.03645493001204367
 -0.007472767505983238  -0.01098763194818186

[:, :, 1, 2] =
 -0.022921136367355186  -0.0652788795933827
 -0.015080480892835191  -0.028088590730350863

[:, :, 2, 2] =
 0.02066214582178875   0.04091196504502683
 0.008487408954701259  0.012831131735684294

[:, :, 3, 2] =
 -0.023334690710183514  -0.059859795598737775
 -0.013474001990996893  -0.024001166859529164

[:, :, 4, 2] =
  0.012279687626009719    0.00575

In [428]:
BT_new = BT_re_imag - 0.4 * grad_B[1]

ITensor ord=4 (dim=2|id=576|"S=1/2,Site,n=1") (dim=2|id=728|"S=1/2,Site,n=2") (dim=4|id=977|"Link,l=2") (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}

In [429]:
norm(BT_new)

1.0143701946288761

In [430]:
BT_new2 = BT_re_imag - 0.4 * grad_B2[1]

ITensor ord=4 (dim=2|id=576|"S=1/2,Site,n=1") (dim=2|id=728|"S=1/2,Site,n=2") (dim=4|id=977|"Link,l=2") (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}

In [431]:
norm(BT_new2)

1.0143701946288761

In [432]:
grad_B = gradient(loss, BT_new)
grad_B2 = gradient(loss2, BT_new2)

(ITensor ord=4
Dim 1: (dim=4|id=977|"Link,l=2")
Dim 2: (dim=2|id=728|"S=1/2,Site,n=2")
Dim 3: (dim=2|id=576|"S=1/2,Site,n=1")
Dim 4: (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}
 4×2×2×2
[:, :, 1, 1] =
 -0.04655765017480755   -0.05444944248686058
  0.015113967401575678   0.009026612769894835
 -0.0375188455741304    -0.040696833896779125
 -0.018804852410857678  -0.03608214599605803

[:, :, 2, 1] =
 -0.00837836965311047    -0.002631439328713422
  0.0002568448194692703  -0.005169294969154359
 -0.00584574102321416     9.522257243433796e-5
 -0.007396351825566706   -0.0108752736591257

[:, :, 1, 2] =
 -0.0226867474036909    -0.06461134510940962
  0.020450857041484343   0.040493603889860424
 -0.023096072786301866  -0.05924767605847245
  0.012154116920846912   0.005699699456697428

[:, :, 2, 2] =
 -0.014926269590595181   -0.027801359959303495
  0.00840061766489764     0.012699922025050124
 -0.013336218361404332   -0.02375573362547054
 -0.0002374571463307531  -0.0052428132050791

In [433]:
grad_B

(ITensor ord=4
Dim 1: (dim=2|id=576|"S=1/2,Site,n=1")
Dim 2: (dim=2|id=728|"S=1/2,Site,n=2")
Dim 3: (dim=4|id=977|"Link,l=2")
Dim 4: (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}
 2×2×4×2
[:, :, 1, 1] =
 -0.047036225972030485  -0.055579044494272915
 -0.009225609095664453  -0.01133933088413079

[:, :, 2, 1] =
  0.013004594446715477  0.009711433915436642
 -0.004521327367074225  0.0022044031215030548

[:, :, 3, 1] =
 -0.034896831107142354   -0.04011511898628366
 -0.0033611598039384057   1.1137050722715807e-5

[:, :, 4, 1] =
 -0.027627716389857032  -0.03521665554294252
 -0.001669463194938564  -0.009873622067656898

[:, :, 1, 2] =
 -0.03190242606946411   -0.05572588565908074
 -0.021669643379216653  -0.017531420509385415

[:, :, 2, 2] =
 0.022199293743812618   0.04379721234886462
 0.0031307320507996308  0.01673526497944185

[:, :, 3, 2] =
 -0.0182337389872561     -0.05172263225259304
 -0.0036095595528816034  -0.013541228303095723

[:, :, 4, 2] =
  0.003458511172992503   0.0072

In [434]:
BT_new_new = BT_new - 0.9 * grad_B[1]
BT_new2_new = BT_new2 - 0.9 * grad_B2[1]

ITensor ord=4 (dim=2|id=576|"S=1/2,Site,n=1") (dim=2|id=728|"S=1/2,Site,n=2") (dim=4|id=977|"Link,l=2") (dim=2|id=888|"C")
NDTensors.Dense{Float64, Vector{Float64}}

In [435]:
norm(BT_new_new)

1.0283326717377383

In [436]:
norm(BT_new2_new)

1.0587373951908416

Try looping both (many updates) and see how the norm behaves for regularised versus non regularised w/ same learning rate.

In [437]:
BT_opt_reg = BT_re_imag
BT_opt_non_reg = BT_re_imag
for i in 1:1000
    grad_B = gradient(loss, BT_opt_reg)
    grad_B2 = gradient(loss2, BT_opt_non_reg)
    BT_opt_reg -= 0.8 .* grad_B[1]
    BT_opt_non_reg -= 0.8 .* grad_B2[1]
end

In [438]:
norm(BT_opt_reg)

0.856838170406682

In [439]:
norm(BT_opt_non_reg)

6.329981755890319