In [0]:
import torch
from torch import tensor, manual_seed, rand
import math
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import plotly.graph_objects as go
from plotly import express as px

# Matrix inverse

In [20]:
M1234 = torch.diag(tensor([1.,2.,3.,4.]))
M1234

tensor([[1., 0., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 3., 0.],
        [0., 0., 0., 4.]])

## Let's use the builtin method first

In [21]:
inverse = torch.inverse(M1234)
inverse

tensor([[1.0000, 0.0000, 0.0000, -0.0000],
        [0.0000, 0.5000, 0.0000, -0.0000],
        [0.0000, 0.0000, 0.3333, -0.0000],
        [0.0000, 0.0000, 0.0000, 0.2500]])

In [22]:
# Let's check it's really the inverse ... of course it is!
inverse @ M1234

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

# Now let's use gradient descent to do the same

So what we want a matrix that - multiplied by the original matrix M1234 - yields the identity matrix

In [23]:
identity = torch.diag(torch.tensor([1,1,1,1])).float()
Y_true = identity
Y_true

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [0]:
manual_seed(314)
random_inverse = torch.rand(size=[4,4], requires_grad=True)
# random_inverse = torch.nn.Parameter(random_inverse) 

In [25]:
Y_hat = random_inverse @ M1234
Y_hat

tensor([[0.7196, 1.2590, 2.0002, 1.3540],
        [0.8522, 0.6251, 1.5017, 1.8571],
        [0.0083, 0.8938, 2.4086, 2.2047],
        [0.3556, 0.7420, 2.0490, 1.2297]], grad_fn=<MmBackward>)

Of course random_inverse, is not yet the inverse, therefore Y_hat is not the identity matrix. 


---

A useful distance is the element-wise mse

In [0]:
def mse(y, y_hat): return ((y-y_hat)**2).mean()

In [0]:
loss = mse(Y_true, Y_hat)

In [0]:
loss.backward()

In [29]:
random_inverse.grad

tensor([[-3.5046e-02,  3.1475e-01,  7.5007e-01,  6.7699e-01],
        [ 1.0653e-01, -9.3718e-02,  5.6315e-01,  9.2856e-01],
        [ 1.0338e-03,  2.2346e-01,  5.2824e-01,  1.1024e+00],
        [ 4.4448e-02,  1.8549e-01,  7.6836e-01,  1.1486e-01]])

In [0]:
G=random_inverse.grad.detach().numpy()

Let us use gradient descent with respect to the random_inverse and mse as the loss function to approximate the inverse matrix.

In [0]:
lrs = np.log(np.logspace(2e-1, 2e-1, 100))
losses = []

def update(lr):
  Y_hat = random_inverse@M1234
  loss = mse(Y_true, Y_hat)
  loss.backward()
  losses.append(loss.detach().numpy())


  if t%20==0:
    print(t,':', loss)
    Y_hat = random_inverse @ M1234

    G_true = Y_true.detach().numpy()
    G_hat = Y_hat.detach().numpy()
    Diff = G_hat - G_true

    fig = px.imshow(Diff)
    fig.update_yaxes(showticklabels=False)
    fig.show()
    print(Diff)

  with torch.no_grad():
    random_inverse.sub_(lr * random_inverse.grad)
    random_inverse.grad.zero_()


In [36]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
manual_seed(314)
random_inverse = torch.rand(size=[4,4])
random_inverse = torch.nn.Parameter(random_inverse) 
for t, lr in enumerate(lrs): update(lr)


0 : tensor(1.6650, grad_fn=<MeanBackward0>)


[[-0.28 1.26 2.00 1.35]
 [0.85 -0.37 1.50 1.86]
 [0.01 0.89 1.41 2.20]
 [0.36 0.74 2.05 0.23]]
20 : tensor(0.0054, grad_fn=<MeanBackward0>)


[[-0.09 0.01 0.00 0.00]
 [0.26 -0.00 0.00 0.00]
 [0.00 0.00 0.00 0.00]
 [0.11 0.00 0.00 0.00]]
40 : tensor(0.0005, grad_fn=<MeanBackward0>)


[[-0.03 0.00 0.00 0.00]
 [0.08 -0.00 0.00 0.00]
 [0.00 0.00 0.00 0.00]
 [0.03 0.00 0.00 0.00]]
60 : tensor(4.7337e-05, grad_fn=<MeanBackward0>)


[[-0.01 0.00 0.00 0.00]
 [0.02 -0.00 0.00 -0.00]
 [0.00 0.00 0.00 0.00]
 [0.01 0.00 0.00 0.00]]
80 : tensor(4.4184e-06, grad_fn=<MeanBackward0>)


[[-0.00 0.00 0.00 0.00]
 [0.01 -0.00 0.00 -0.00]
 [0.00 0.00 0.00 0.00]
 [0.00 0.00 0.00 0.00]]


In [0]:
import pandas as pd

import plotly.express as px
df = pd.DataFrame({'losses':np.array(losses).flatten()})
px.line(df, y='losses', log_y=True)

## Random Indicator Matrix

In [0]:
# A particular use case might be an indicator matrix of for example
# which words are in which documents

# Let's generate a random matrix for such a use case
number_of_documents = 17
number_of_words = 3

# probability of a particular word being in a particular document
p = 0.2

manual_seed(123)
indicator = (rand(size=[number_of_documents, number_of_words])<0.2).int()
indicator

tensor([[0, 0, 0],
        [0, 1, 0],
        [1, 1, 1],
        [0, 0, 0],
        [1, 1, 0],
        [0, 1, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 1],
        [0, 0, 0],
        [0, 0, 0],
        [1, 0, 1],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]], dtype=torch.int32)