In [0]:
import torch

torch.set_printoptions(sci_mode=False)

In [0]:
def mse(y_hat, y_true): return ((y_hat-y_true)**2).mean()

def update(approximate_inverse, M, learning_rate, momentum, losses):

  # identity matrix
  y_true = torch.eye(n=len(M))

  # estimate of identity matrix
  # here we have various choices a) b) c)
  
  # a)
  y_hat = approximate_inverse@M

  ## b)
  #y_hat = M@approximate_inverse

  ## c)
  #y_hat = (approximate_inverse@M + M@approximate_inverse)/2


  
  # loss = "degree of being lost"
  loss = mse(y_hat, y_true)
  losses.append(loss.detach().numpy())

  # calculate loss
  loss.backward()

  with torch.no_grad():
    # displace by learning_rate * derivatives
    
    # (i)
    approximate_inverse -= learning_rate * approximate_inverse.grad

    # remark: instead of (i) we could alternatively use sub_ and write (ii),
    #         this is computationally more efficient but harder to read.
    # (ii)
    # approximate_inverse.sub_(learning_rate * approximate_inverse.grad)
    

    # momentum
    # (iii)
    approximate_inverse.grad *= momentum # reduce speed due to friction

    # remark for the case of momentum == 0 we could alternatively write
    # (iv)
    # approximate_inverse.grad.zero_()

  return None

def calculate_inverse(M, learning_rate=0.9, momentum=0.9):

  torch.manual_seed(314)

  approximate_inverse = torch.rand(size=(4,4), requires_grad=True)
  losses = []
  

  for t in range(10000): 
    update(approximate_inverse, M, learning_rate, momentum, losses)
  
  return approximate_inverse, losses


# Calculate inverse of random matrix

In [0]:
torch.manual_seed(314)
M1 = torch.rand(size=(4,4)) # torch.diag(torch.tensor([1.,2.,3.,4.]))

approximate_inverse_1, losses_1 = calculate_inverse(M1)

# Calculate inverse of diagonal matrix

In [0]:
losses2 = []
torch.manual_seed(314)

M2 = torch.diag(torch.tensor([1.,2.,3.,4.])) # torch.rand(size=(4,4)) # 

approximate_inverse_2, losses_2 = calculate_inverse(M2)

# Inspect results

Let's inspect whether the approximate_inverse_1 is a good approximation the inverse of M1

In [85]:
approximate_inverse_1 @ M1

tensor([[     1.0000,      0.0000,     -0.0000,      0.0000],
        [     0.0000,      1.0000,      0.0000,     -0.0000],
        [     0.0000,      0.0000,      1.0000,      0.0000],
        [     0.0000,     -0.0000,      0.0000,      1.0000]],
       grad_fn=<MmBackward>)

Let's inspect whether the approximate_inverse_2 is a good approximation the inverse of M2

In [86]:
approximate_inverse_1 @ M1

tensor([[     1.0000,      0.0000,     -0.0000,      0.0000],
        [     0.0000,      1.0000,      0.0000,     -0.0000],
        [     0.0000,      0.0000,      1.0000,      0.0000],
        [     0.0000,     -0.0000,      0.0000,      1.0000]],
       grad_fn=<MmBackward>)

Looking good!

# Compare convergence speed for simple diagonal matrix and general random matrix

In [87]:
import pandas as pd
import numpy as np


import plotly.express as px
df = pd.DataFrame(
    {
     'losses_1':np.array(losses_1).flatten(),
     'losses_2':np.array(losses_2).flatten()
     })
fig = px.line(df, y='losses_1', log_y=True)
fig.update_traces(name='Random Matrix, Momentum=0.9', showlegend = True)
fig.add_scatter(y=df['losses_2'], name='Simple Diagonal Matrix,  Momentum=0.9')

fig.update_layout(title=\
                  '<b>How fast does the gradient algorithm converge?</b>' + \
                  '<br>Answer:It depends, for the Simple Diagonal Matrix it' + \
                  ' converges faster',
                  xaxis_title='Iterations',
                  yaxis_title='Losses'
                  )
fig