# Neural Network Model Without Training
Import in libraries

In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

Check that pytorch is running on gpu

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


Define neural net model class. We will have an input layer, one hidden layer, and an output layer. nn.Relu simply changes all negative numbers into zeros. nn.Linear performs linear transformations of the inputs to obtain the outputs for each of the nodes. nn.Flatten will flatten input tensor, into less dimensions (ex [[1,2],[3,4]] becomes [1,2,3,4])

> **Questions:**
>- Why do we use the ReLU function in this case instead of other activation functions?
>- Why do we use 512 as the hidden layer size? Is this arbitrary, or is there some way to make a good decision for this?

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        # ...idk about this line...
        super(NeuralNetwork, self).__init__()
        # I think flattens an array/tensor??
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            # arguments of nn.Linear are (# of input nodes, # of output nodes) for the given layer
            nn.Linear(28*28, 512),
            # Q: Why do we use ReLU here instead of sigmoid or other function??
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        # Passes input data throught the model and returns the output
        logits = self.linear_relu_stack(x)
        return logits

Create an instance of the model, put onto gpu.

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


Create random test input and pass it through the (untrained) model. Let's check to see what weights/biases were automatically assigned to the model at creation.

In [6]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)

# Softmax: converts numbers from the output layer from infinite range into a probability in [0,1]
pred_probab = nn.Softmax(dim=1)(logits)

# Which output node has the highest probability? That is the predicted class!
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

print(f"First Linear weights: {model.linear_relu_stack[0].weight} \n")

print(f"First Linear weights: {model.linear_relu_stack[0].bias} \n")

Predicted class: tensor([3], device='cuda:0')
First Linear weights: Parameter containing:
tensor([[ 0.0293, -0.0008,  0.0235,  ..., -0.0003, -0.0275,  0.0145],
        [ 0.0185, -0.0049,  0.0257,  ...,  0.0131,  0.0317, -0.0196],
        [-0.0239, -0.0062, -0.0054,  ...,  0.0312,  0.0029, -0.0082],
        ...,
        [-0.0259,  0.0107, -0.0184,  ...,  0.0054,  0.0082, -0.0226],
        [-0.0276,  0.0050, -0.0124,  ...,  0.0068, -0.0216, -0.0284],
        [-0.0026, -0.0097,  0.0314,  ...,  0.0069, -0.0100,  0.0121]],
       device='cuda:0', requires_grad=True) 

First Linear weights: Parameter containing:
tensor([-1.7845e-02,  2.9038e-02, -2.8424e-02,  3.0901e-02,  1.2755e-02,
        -1.8704e-02,  2.2038e-03,  1.4839e-02,  3.1548e-02, -9.1306e-03,
        -7.4122e-03, -3.0813e-03, -1.7229e-02, -2.4003e-02, -3.5428e-02,
         3.3135e-02, -2.2036e-02, -3.8221e-03, -3.5489e-02,  2.0112e-02,
         1.9395e-02, -1.5363e-02,  6.0703e-03,  7.5113e-03, -2.9885e-02,
         3.2538e-04, 