In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
class NeuralNetwork(nn.Module):
    """
    A simple neural network with one hidden layer.
    """
    def __init__(self, input_size, hidden_size, output_size, l2_lambda = 0.01):
        super().__init__()

        self.hidden = nn.Linear(input_size, hidden_size)
        nn.init.xavier_uniform_(self.hidden.weight)

        self.bn = nn.BatchNorm1d(hidden_size)

        self.sigmoid = nn.Sigmoid()

        self.dropout = nn.Dropout(p=0.2)

        self.output = nn.Linear(hidden_size, output_size)
        nn.init.xavier_uniform_(self.output.weight)
        
        self.l2_lambda = l2_lambda

    def forward(self, x):
        """
        Performs a forward pass through the neural network.
        """
        x = self.hidden(x)
        x = self.bn(x)
        x = self.sigmoid(x)
        x = self.dropout(x)
        x = self.output(x)
        return x


In [None]:
X = datasets.load_boston()['data']
y = datasets.load_boston()['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
self = NeuralNetwork(input_size = X_train.shape[1], hidden_size = 50, output_size = 1)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.SGD(self.parameters(), 
            lr=0.01, 
            momentum = 0.9, 
            weight_decay=0.01 #l2 reg
            )

In [None]:
self.train()  # turn on dropout

for _ in range(10):
    optimizer.zero_grad()
    output = self(torch.Tensor(X_train))
    loss = criterion(output, torch.Tensor(y_train).view(-1,1))

    # Calculates gradient
    loss.backward()
    print(self.hidden.weight[0])

    # Updates weights
    optimizer.step()

In [None]:
self.eval()  # turn off dropout
with torch.no_grad():  # turn off autograd for faster computation and to save memory
    predicted_output = self(torch.Tensor(X_test))
sum((predicted_output - torch.Tensor(y_test).view(-1,1))**2)

In [None]:
for k,v in self.bn.state_dict().items():
    print(f"{k}: {v}")

## QuickProp

In [None]:
import torch
from torch.optim import Optimizer

class QuickProp(Optimizer):
    def __init__(self, params, lr=1e-3):
        if lr < 0.0:
            raise ValueError("Invalid learning rate: {}".format(lr))
        defaults = dict(lr=lr)
        super(QuickProp, self).__init__(params, defaults)

    @torch.no_grad()
    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue

                d_p = p.grad
                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['prev_delta'] = torch.zeros_like(p, memory_format=torch.preserve_format)
                    state['prev_update'] = torch.zeros_like(p, memory_format=torch.preserve_format)

                prev_delta = state['prev_delta']
                prev_update = state['prev_update']

                denom = prev_delta - d_p + 1e-10  # Add epsilon to prevent division by zero
                update = d_p * prev_update / denom
                p.add_(update, alpha=-group['lr'])

                # Update state
                state['prev_delta'] = d_p.clone()
                state['prev_update'] = update.clone()

        return loss


In [None]:
criterion = nn.MSELoss()
optimizer = QuickProp(self.parameters(), 
            lr=0.01, 
            )

In [None]:
self.train()  # turn on dropout

for _ in range(10):
    optimizer.zero_grad()
    output = self(torch.Tensor(X_train))
    loss = criterion(output, torch.Tensor(y_train).view(-1,1))

    # Calculates gradient
    loss.backward()
    print(self.hidden.weight[0])

    # Updates weights
    optimizer.step()

In [None]:
for k,v in optimizer.state.items():
    print(v)