In [0]:
import torch
from torch import tensor, manual_seed, rand
import math
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import plotly.graph_objects as go
from plotly import express as px

# Matrix inverse

In [0]:
M1234 = torch.rand(size=(4,4))
M1234 = M1234 #torch.diag(tensor([1.,2.,3.,4.]))
M1234

tensor([[0.4114, 0.6123, 0.7537, 0.4234],
        [0.1923, 0.2576, 0.9520, 0.4453],
        [0.3066, 0.8974, 0.8174, 0.7198],
        [0.0608, 0.1571, 0.2139, 0.0310]])

In [0]:
assert torch.det(M1234), 'Should not be singular'

## Let's use the builtin method first

In [0]:
inverse = torch.inverse(M1234)
inverse

tensor([[ 5.6399, -0.9163, -2.4785, -6.3223],
        [-1.1052, -1.5061,  1.3471,  5.4487],
        [-0.7697,  1.2419, -0.4736,  3.6695],
        [-0.1502,  0.8577,  1.3031, -8.2674]])

In [0]:
# Let's check it's really the inverse ... of course it is!
inverse @ M1234

tensor([[ 1.0000e+00, -3.4967e-07, -3.7993e-07, -2.2433e-07],
        [ 2.9787e-08,  1.0000e+00, -5.7286e-08,  4.5206e-08],
        [ 3.0543e-09,  5.4313e-09,  1.0000e+00,  1.0119e-08],
        [-5.7571e-08, -5.5442e-08, -3.6310e-08,  1.0000e+00]])

# Now let's use gradient descent to do the same

So what we want a matrix that - multiplied by the original matrix M1234 - yields the identity matrix

In [0]:
identity = torch.diag(torch.tensor([1,1,1,1])).float()
Y_true = identity
Y_true

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [0]:
manual_seed(314)
random_inverse = torch.rand(size=[4,4], requires_grad=True)
# random_inverse = torch.nn.Parameter(random_inverse) 

In [0]:
Y_hat = random_inverse @ M1234
Y_hat

tensor([[0.6421, 1.2543, 1.7590, 1.0755],
        [0.5924, 1.1245, 1.4484, 0.8748],
        [0.3690, 0.9273, 1.2059, 0.7975],
        [0.4457, 0.9745, 1.2452, 0.8169]], grad_fn=<MmBackward>)

Of course random_inverse, is not yet the inverse, therefore Y_hat is not the identity matrix. 


---

A useful distance is the element-wise mse

In [0]:
def mse(y, y_hat): return ((y-y_hat)**2).mean()

In [0]:
loss = mse(Y_true, Y_hat)

# let's calculate the gradients
loss.backward()

In [0]:
# random_inverse.grad is the gradient at each position of the matrix
# to get a better understanding let's just take a look
random_inverse.grad

tensor([[0.3002, 0.3010, 0.4035, 0.0731],
        [0.2227, 0.2393, 0.2634, 0.0491],
        [0.1516, 0.1076, 0.2110, 0.0296],
        [0.2051, 0.1801, 0.2372, 0.0551]])

Let us use gradient descent with respect to the random_inverse and mse as the loss function to approximate the inverse matrix.

In [0]:
lrs = np.log(np.logspace(2e-1, 2e-1, 10000))
losses = []

def update(lr):
  Y_hat = random_inverse@M1234
  loss = mse(Y_true, Y_hat)
  loss.backward()
  losses.append(loss.detach().numpy())


  if t%2000==0:
    print(t,':', loss)
    Y_hat = random_inverse @ M1234

    # plot the intermediate results
    G_true = Y_true.detach().numpy()
    G_hat = Y_hat.detach().numpy()
    Diff = G_hat - G_true

    fig = px.imshow(Diff)
    fig.update_yaxes(showticklabels=False)
    fig.show()
    print(Diff)

  with torch.no_grad():
    random_inverse.sub_(lr * random_inverse.grad)

    # random_inverse.grad is the gradient at each position of the matrix, it might look like this: 
    #tensor([[0.3002, 0.3010, 0.4035, 0.0731],
    #    [0.2227, 0.2393, 0.2634, 0.0491],
    #    [0.1516, 0.1076, 0.2110, 0.0296],
    #    [0.2051, 0.1801, 0.2372, 0.0551]])

    random_inverse.grad.zero_()


In [0]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
manual_seed(314)
random_inverse = torch.rand(size=[4,4])
random_inverse = torch.nn.Parameter(random_inverse) 
for t, lr in enumerate(lrs): update(lr)


0 : tensor(0.8493, grad_fn=<MeanBackward0>)


[[-0.36 1.25 1.76 1.08]
 [0.59 0.12 1.45 0.87]
 [0.37 0.93 0.21 0.80]
 [0.45 0.97 1.25 -0.18]]
2000 : tensor(0.0157, grad_fn=<MeanBackward0>)


[[-0.20 0.14 0.09 -0.18]
 [0.13 -0.09 -0.06 0.13]
 [0.07 -0.05 -0.03 0.07]
 [-0.18 0.13 0.09 -0.19]]
4000 : tensor(0.0040, grad_fn=<MeanBackward0>)


[[-0.10 0.07 0.04 -0.09]
 [0.07 -0.05 -0.03 0.06]
 [0.04 -0.03 -0.02 0.04]
 [-0.09 0.07 0.04 -0.09]]
6000 : tensor(0.0010, grad_fn=<MeanBackward0>)


[[-0.05 0.03 0.02 -0.05]
 [0.03 -0.02 -0.01 0.03]
 [0.02 -0.01 -0.01 0.02]
 [-0.05 0.03 0.02 -0.05]]
8000 : tensor(0.0003, grad_fn=<MeanBackward0>)


[[-0.02 0.02 0.01 -0.02]
 [0.02 -0.01 -0.01 0.02]
 [0.01 -0.01 -0.00 0.01]
 [-0.02 0.02 0.01 -0.02]]


In [0]:
import pandas as pd

import plotly.express as px
df = pd.DataFrame({'losses':np.array(losses).flatten()})
px.line(df, y='losses', log_y=True)

## Random Indicator Matrix

In [0]:
# A particular use case might be an indicator matrix of for example
# which words are in which documents

# Let's generate a random matrix for such a use case
number_of_documents = 17
number_of_words = 3

# probability of a particular word being in a particular document
p = 0.2

manual_seed(123)
indicator = (rand(size=[number_of_documents, number_of_words])<0.2).int()
indicator

tensor([[0, 0, 0],
        [0, 1, 0],
        [1, 1, 1],
        [0, 0, 0],
        [1, 1, 0],
        [0, 1, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 1],
        [0, 0, 0],
        [0, 0, 0],
        [1, 0, 1],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]], dtype=torch.int32)