In [1]:
import random

import numpy as np
import matplotlib.pyplot as plt
import gymnasium
from tqdm import tqdm
from matplotlib.axes import Axes
from matplotlib.figure import Figure
import torch
import torch.nn.functional as F
import torch.distributions as dist
from torch import nn
import cloudpickle
import sklearn
from sklearn import preprocessing
torch.set_grad_enabled(True) 

<torch.autograd.grad_mode.set_grad_enabled at 0x14f56bbd0>

In [311]:
class Network(torch.nn.Module):
    def __init__(self,num_inputs,num_outputs):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_inputs,256),
            nn.ReLU(),
            nn.Linear(256,256),
            nn.ReLU(),
            nn.Linear(256,256),
            nn.ReLU(),
            nn.Linear(256,256),
            nn.ReLU(),
            nn.Linear(256,256),
            nn.ReLU(),
            nn.Linear(256,num_outputs)
        )
    
    def forward(self, x):
        x = self.network(x)
        return x

In [312]:
def seed(s:int):
    random.seed(s)
    torch.manual_seed(s)

In [313]:
def train_one_step(network: Network,optim:torch.optim.Optimizer, x:torch.Tensor,y:torch.Tensor):
    network.train()
    out = network(x)
    loss = F.mse_loss(y.detach(),out)
    optim.zero_grad()
    loss.backward()
    optim.step()
    return loss.item()

In [314]:
def eval_one_step(network:Network,x:torch.Tensor,y:torch.Tensor):
    network.eval()
    with torch.no_grad():
        out = network(x)
        loss = F.mse_loss(y,out)
    return loss.item()

<h1> MLE of linear regression </h1>

In [315]:
NUM_STEPS = 40000
BATCH_SIZE = 32
EVAL_STEPS = 50
X_SIZE = 1000
OBS_SIZE = 10
seed(22)
network = Network(X_SIZE*(OBS_SIZE+1),OBS_SIZE)
bst_loss = 1000.
optim = torch.optim.SGD(network.parameters(),lr=0.1)
loss_lst = []
for i in range(NUM_STEPS):
    obs_batch = torch.zeros(BATCH_SIZE,X_SIZE*(OBS_SIZE+1))
    theta_batch = torch.zeros(BATCH_SIZE,OBS_SIZE)
    for j in range(BATCH_SIZE):
        x = torch.randn(X_SIZE,OBS_SIZE)
        y = torch.randn(X_SIZE)
        obs = torch.cat([x,y.unsqueeze(1)],dim=1)
        obs_batch[j] = obs.flatten()
        theta = torch.inverse(x.T@x + (1e-5)*torch.eye(OBS_SIZE))@x.T@y
        theta_batch[j] = theta
    loss = train_one_step(network,optim,obs_batch,theta_batch)
    loss_lst.append(loss)
    if i%100 == 0:
        losses = []
        for k in range(EVAL_STEPS):
            x = torch.randn(X_SIZE,OBS_SIZE)
            y = torch.randn(X_SIZE)
            obs = torch.cat([x,y.unsqueeze(1)],dim=1)
            theta = torch.inverse(x.T@x + (1e-5)*torch.eye(OBS_SIZE))@x.T@y
            losses.append(eval_one_step(network,obs.flatten(),theta))
        losses = np.mean(losses)
        print("\r| Loss at epoch {:5d} is {:8.5f} Best Loss {:7.5f} |".format(i,losses,bst_loss,losses<bst_loss))
        if losses < bst_loss:
            with open("./models/optim.pkl","wb") as f:
                cloudpickle.dump(network,f)
            bst_loss = losses
print("Mean loss is",np.mean(loss_lst[-10:]))

| Loss at epoch     0 is  0.00293 Best Loss 1000.00000 |
| Loss at epoch   100 is  0.00095 Best Loss 0.00293 |
| Loss at epoch   200 is  0.00104 Best Loss 0.00095 |
| Loss at epoch   300 is  0.00107 Best Loss 0.00095 |
| Loss at epoch   400 is  0.00095 Best Loss 0.00095 |
| Loss at epoch   500 is  0.00114 Best Loss 0.00095 |
| Loss at epoch   600 is  0.00109 Best Loss 0.00095 |
| Loss at epoch   700 is  0.00107 Best Loss 0.00095 |
| Loss at epoch   800 is  0.00094 Best Loss 0.00095 |
| Loss at epoch   900 is  0.00096 Best Loss 0.00094 |
| Loss at epoch  1000 is  0.00096 Best Loss 0.00094 |
| Loss at epoch  1100 is  0.00094 Best Loss 0.00094 |
| Loss at epoch  1200 is  0.00103 Best Loss 0.00094 |
| Loss at epoch  1300 is  0.00104 Best Loss 0.00094 |
| Loss at epoch  1400 is  0.00106 Best Loss 0.00094 |
| Loss at epoch  1500 is  0.00103 Best Loss 0.00094 |
| Loss at epoch  1600 is  0.00111 Best Loss 0.00094 |
| Loss at epoch  1700 is  0.00101 Best Loss 0.00094 |
| Loss at epoch  1800 is 

[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument
[E thread_pool.cpp:109] Exception in thread pool task: mutex lock failed: Invalid argument


KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(12,5))
plt.plot(range(NUM_STEPS),loss_lst)
plt.xlabel("Step")
plt.ylabel("Loss")
plt.grid()
plt.show()

In [318]:
with open("./models/optim.pkl","rb") as f:
    network = cloudpickle.load(f)
network.eval()
with torch.no_grad():
    x = torch.randn(X_SIZE,OBS_SIZE)
    y = torch.randn(X_SIZE)
    obs = torch.cat([x,y.unsqueeze(1)],dim=1)
    theta = torch.inverse(x.T@x + (1e-5)*torch.eye(OBS_SIZE))@x.T@y
    theta2 = network(obs.flatten())
    tst = 10*torch.randn(OBS_SIZE,1)+100
    print(tst.T@theta,"\n",tst.T@theta2)

KeyboardInterrupt: 

<h1> MLE of a gaussian </h1>

In [None]:
NUM_STEPS = 20000
BATCH_SIZE = 32
MULTIPLIER = 10
OBS_SIZE = 3
seed(22)
network = Network(3,1)
optim = torch.optim.SGD(network.parameters(),lr=5e-4)
loss_lst = []
for i in tqdm(range(NUM_STEPS)):
    loc = MULTIPLIER*torch.randn((BATCH_SIZE,)).unsqueeze(1).repeat(1,OBS_SIZE)
    scale = MULTIPLIER*torch.randn((BATCH_SIZE,)).unsqueeze(1).repeat(1,OBS_SIZE)
    obs = scale*torch.randn((BATCH_SIZE,3)) + loc
    y = torch.mean(obs,dim=-1)
    loss = train_one_step(network,optim,obs,y.unsqueeze(1))
    loss_lst.append(loss)
print("Mean loss is",np.mean(loss_lst[-10:]))

In [None]:
plt.figure(figsize=(12,5))
plt.plot(range(NUM_STEPS),loss_lst)
plt.xlabel("Step")
plt.ylim(0,2)
plt.ylabel("Loss")
plt.grid()

In [None]:
with open("./models/optim.pkl","rb") as f:
    network = cloudpickle.load(f)
network.eval()
x = torch.mean(torch.tensor([200,300,150],dtype=torch.float))
torch.allclose(network(torch.tensor([200,300,150],dtype=torch.float)),x,atol=1)