In [2]:
import torch, numpy

In [7]:
import torch, math
from torch.autograd import Function
class MyMul(Function):
    @staticmethod
    def forward(ctx, x, y):
        ctx.save_for_backward(x,y)
        return x * y

    @staticmethod
    def backward(ctx, grad_output):
        x,y = ctx.saved_tensors
        return grad_output * y,grad_output * x

class MyMax(Function):
    @staticmethod
    def forward(ctx, x, y):
        ctx.save_for_backward(x,y)
        return torch.maximum(x,y)
    
    @staticmethod
    def backward(ctx, grad_output):
        x,y  = ctx.saved_tensors
        
        maskx = x > y
        masky = x < y
        maskequal = x == y 

        grad_x = torch.where(maskx, grad_output, torch.where(maskequal, grad_output * 0.5, torch.zeros_like(grad_output)))
        grad_y = torch.where(masky, grad_output, torch.where(maskequal, grad_output * 0.5, torch.zeros_like(grad_output)))
        
        return grad_x, grad_y 

class MyCos(Function):
    def forward(ctx, x):
        ctx.save_for_backward(x) 
        return torch.cos(x)
    def backward(ctx, gradient_output):
        x, = ctx.saved_tensors
        grad_input = gradient_output * -torch.sin(x)
        return grad_input

In [5]:
from torch.autograd import gradcheck

x = torch.randn(3, dtype=torch.double, requires_grad=True)
y = torch.randn(3, dtype=torch.double, requires_grad=True)

# Make sure inputs are a tuple of tensors
inputs = (x, y)

# Run gradcheck
print(gradcheck(MyMul.apply, inputs))
print(gradcheck(MyMax.apply, inputs))

True
True


In [10]:
import torch, math 
import torch.nn.functional 

class CosLinear(nn.Module):
    def __init__(self, in_features, out_features, bias):
        super().__init__()
        self.weight = torch.nn.Parameter(torch.Tensor(out_features, in_features))
        self.reset_parameters()
        if bias: 
            self.bias = torch.nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter("bias", None)
    
    def reset_parameters(self):
   
        torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) # Kaiming uniform initialization to avoid vanishing or exploding gradients 

        if self.bias is not None:
            fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) # shrinks initial weight valeus for many inputs 
            torch.nn.init.uniform_(self.bias, -bound, bound) # initializes bias with random values in range of -bound to bound 
        
        

    def forward(self, input):
        weight_with_cos = MyCos.apply(self.weight)
        return torch.nn.functional.linear(input, weight_with_cos, self.bias)
    
    


# Dynamic Network Implementation

In [None]:
import torch, math, torchvision
import torch.nn as nn
import torch.nn.functional as F 

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()

        self.linear = nn.Linear(64,32)
        self.hidden_layer = nn.Linear(32,32)
        self.output_layer = nn.Linear(32,1)


    def forward(self,input):
        
        outputs = []
        if self.training:
            x1 = F.relu(self.linear(input))
            num_layers = torch.randint(1,5,(1,)).item()
            for _ in range(num_layers): #apply hidden layer multiple times for non linearity
                x1 = F.relu(self.hidden_layer(x1))   
            return self.output_layer(x1)
                

        else:
            for n in range(1,5):
                x = F.relu(self.linear(input))
                for _ in range(n):
                    x = F.relu(self.hidden_layer(x))
                out = self.output_layer(x)
                outputs.append(out)
        
        mean_outputs = torch.stack(outputs).mean(dim=0)

        return mean_outputs      
    
    

In [10]:
import numpy as np

n = 2**14
dim_input = 64
dim_output = 1

X = np.random.randn(n, dim_input).astype(np.float32)

true_weights = np.random.randn(dim_input, dim_output)
y = X @ true_weights + np.random.randn(n, dim_output) * 0.1

In [12]:
samples = np.random.permutation(n)
split = int(n * 0.75)

train = samples[:split]
test = samples[split:]

X_train, y_train = X[train], y[train]
X_test, y_test = X[test], y[test]