<a href="https://colab.research.google.com/github/ftk1000/torch_demos/blob/master/pytorch_CUDAnn_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# pytorch_CUDAnn_demo.ipynb

 https://pytorch.org/tutorials/beginner/pytorch_with_examples.html

# PyTorch:nn module


In [1]:
import torch
import time
assert torch.cuda.is_available()
cuda_device = torch.device("cuda")  # device object representing GPU

# batch_size = 16; input_features = 32; state_size = 128
# # Note the device=cuda_device arguments here
# X = torch.randn(batch_size, input_features, device=cuda_device)
# h = torch.randn(batch_size, state_size, device=cuda_device)
# C = torch.randn(batch_size, state_size, device=cuda_device)

In [2]:
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")

print(device)

cuda


In [3]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in, device=cuda_device)
y = torch.randn(N, D_out, device=cuda_device)

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
).to(cuda_device)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

fwd_time = bkwd_time = 0

TIMELENGTH = 5000

PRINT_INTERVAL = TIMELENGTH/10

learning_rate = 1e-4
for t in range(TIMELENGTH):
    start_time = time.time()
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(x)
    torch.cuda.synchronize()

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % PRINT_INTERVAL == PRINT_INTERVAL-1:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()
    fwd_time += time.time() - start_time

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    start_time = time.time()
    loss.backward()
    torch.cuda.synchronize()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

    bkwd_time += time.time() - start_time    

print('Forward: {:.3f} us | Backward {:.3f} us'.format(fwd_time * 1e6/1e5, bkwd_time * 1e6/1e5))

499 3.0344322112796362e-06
999 6.336886571034483e-10
1499 1.339501148889255e-10
1999 6.136480906748787e-11
2499 3.741347162633524e-11
2999 2.4342446461322886e-11
3499 1.864106612126104e-11
3999 1.4826671140832914e-11
4499 1.1493074721091734e-11
4999 9.779886102345703e-12
Forward: 16.387 us | Backward 28.330 us
