In [29]:
import torch

# Define Dense Network from scratch

In [64]:
class LinearLayer(object):
    def __init__(self):
        pass 
    
    def init_layer(self, in_dim, out_dim):
        """initialize weights and biases (w,b)
        
        >>> w,b = init_layer(n_input, n_hidden)
        >>> w,b = init_layer(n_hidden, n_output
        """
        w = torch.randn(in_dim, out_dim)
        b = torch.randn((1,out_dim))
        return (w,b)
        
class DenseNetwork(object):
    def __init__(self, n_input, n_hidden, n_output):
        self.num_classes, self.output_activation_fn = self.get_output_activation(n_output)
        self.w1,self.b1 = LinearLayer().init_layer(n_input,  n_hidden)
        self.w2,self.b2 = LinearLayer().init_layer(n_hidden, self.num_classes)
        
    def forward(self, X):
        # let X represent features
        h      = self.sigmoid(torch.mm(X, self.w1) + self.b1)
        logits = torch.mm(h, self.w2) + self.b2
        proba  = self.output_activation_fn(logits)
        return logits, proba
    
    def get_output_activation(self, num_classes):
        if num_classes <= 2:
            num_classes, activation_fn = \
            (num_classes, self.sigmoid) if num_classes == 1 else (num_classes - 1, self.sigmoid)
        else:
            activation_fn = self.softmax
        return num_classes, activation_fn
        
    def sigmoid(self, x):
        return 1/(1+torch.exp(-x))
    
    def softmax(self, x):
        # .sum across a single row for each example; dim=1 for sum across columns
        # want output to be [batch_size, num_classes]
        #[.view(-1,1) for [rows=batch_size,columns=1(1 value for each of rows)]]
        return torch.exp(x)/torch.sum(torch.exp(x), dim=1).view(-1, 1) 

## Random Input

In [65]:
# Dense Network from scratch
features = torch.randn((1, 3))    # (1 row, 3 features) as 3 random normal variables
n_input  = features.shape[1]      # Number of input units, must match number of input features
n_hidden = 2                      # Number of hidden units 
n_output = 1                      # Number of output units
net = DenseNetwork(n_input, n_hidden, n_output)
logits, proba = net.forward(features)
logits, logits.numpy()

(tensor([[-0.7842]]), array([[-0.78421736]], dtype=float32))

## MNIST

In [52]:
import torch
from   torchvision import datasets, transforms

# Define a transform to normalize the data
transform   = transforms.Compose([transforms.ToTensor(),
                                  transforms.Normalize((0.5,), (0.5,)) ])
trainset    = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
# load the traininset for batchtraining
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
dataiter    = iter(trainloader)

In [53]:
images, labels = dataiter.next()
print(type(images), images.shape, labels.shape)
# Flatten the input images
inputs = images.view(images.shape[0], -1)
print(type(inputs), inputs.shape)

<class 'torch.Tensor'> torch.Size([64, 1, 28, 28]) torch.Size([64])
<class 'torch.Tensor'> torch.Size([64, 784])


In [68]:
net = DenseNetwork(n_input=784, n_hidden=256, n_output=10)
logits, proba = net.forward(inputs)
print(logits.shape, proba.shape)
print(proba.sum(dim=1))

torch.Size([64, 10]) torch.Size([64, 10])
tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000])
