In [1]:
import torch 
import torch.nn as nn
import numpy as np


In [2]:
def bernoulli(p):
    return(np.random.binomial(1, p))

In [5]:
s1=bernoulli(0.5)



In [None]:
class NN(nn.Module):
    def __init__(self, input_size, activation=nn.sigmoid()):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 1)
        self.activation = activation

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation(x)
        return x

        


In [None]:

class MLP(nn.Module):
    """
    a MLP class inheriting from the parent class nn.Module. 
    nn.Module is the generic parent class of models in Pytorch.
    It requires a method called forward. 
    
    Pytorch will be able to recursively recover all parameters of the
    attributes of a nn.Module object provided the attributes have themselves
    type nn.Modules or nn.ModuleList.

    In this implementation we explicitly specify the scale of initialization 
    of the weight matrices.
    """
    def __init__(self, layerdims, activation=torch.relu, out_activation=None, init_scale=1):
        super(MLP, self).__init__()

        self.layerdims = layerdims
        self.activation = activation
        self.out_activation = out_activation

        linears = [
            nn.Linear(layerdims[i], layerdims[i + 1]) for i in range(len(layerdims) - 1)
        ]

        if init_scale is not None:
            for l, layer in enumerate(linears):
                torch.nn.init.normal_(
                    layer.weight, std=init_scale / np.sqrt(layerdims[l])
                )
                torch.nn.init.zeros_(layer.bias)

        self.linears = nn.ModuleList(linears)

    def forward(self, x):
        layers = list(enumerate(self.linears))
        for _, l in layers[:-1]:
            x = self.activation(l(x))
        y = layers[-1][1](x)
        if self.out_activation is not None:
            y = self.out_activation(y)
        return y


def train(model, n_iter=100, lr=1e-2):
    plt.figure(figsize=(20, 8))
    axs = [plt.subplot(2, 5, i) for i in range(1, 11)]
    losses = []
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # What is Adam? Which other simpler optimizers can you use?

    for epoch in range(n_iter):
        optimizer.zero_grad() # What is this step? IMPORTANT LINE
        loss = loss_fn(model(x_train), y_train)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
       
        if epoch % (n_iter/10) == 0:
            print(f'Epoch {epoch}: {loss.item()}')

            # plot progress
            ax_index = int(epoch // (n_iter/10))
            plt.sca(axs[ax_index])
            plt.plot(x_grid, target(x_grid), label='target')
            plt.plot(x_grid, grab(model(x_grid)).squeeze(), label='model init')
            
    return losses




def loss_fn(y_pred, y_true):
    return torch.mean((y_pred - y_true) ** 2)