In [1]:
import torch
import numpy as np

In [2]:
#tensor initialization <directly from data>
data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)

#tensor initialization <from np array>
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

print(x_data)
print(x_np)

print("\n" + str(x_np.shape))
print(f"type is {x_data.dtype} vs {x_np.dtype}")
print(f"tensor is stored on {x_np.device}")

tensor([[1, 2],
        [3, 4]])
tensor([[1, 2],
        [3, 4]])

torch.Size([2, 2])
type is torch.int64 vs torch.int64
tensor is stored on cpu


In [3]:
x_ones = torch.ones_like(x_data)
print(f"ones tensor: \n {x_ones}")

x_rand = torch.rand_like(x_data, dtype=torch.float)
print(f"random tensor: \n {x_ones}")

ones tensor: 
 tensor([[1, 1],
        [1, 1]])
random tensor: 
 tensor([[1, 1],
        [1, 1]])


In [4]:
shape = (3,5)
rand_tensor = torch.rand(shape)
print(f"random tensor: \n {rand_tensor}")

random tensor: 
 tensor([[0.0994, 0.8147, 0.4840, 0.0280, 0.2625],
        [0.7591, 0.1866, 0.0308, 0.2977, 0.3998],
        [0.5183, 0.3532, 0.1795, 0.4991, 0.7184]])


In [5]:
tensor = torch.ones(3,4)
tensor2 = torch.rand(3,4)

if torch.cuda.is_available():
  tensor = tensor.to('cuda')

print(tensor)
print(f"Device tensor is stored on: {tensor.device}")

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
Device tensor is stored on: cpu


In [6]:
tensor[:,1]=0
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [7]:
t1 = torch.cat([tensor, tensor], dim = 1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1.]])


In [8]:
print(tensor.mul(tensor))

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [9]:
print(tensor.matmul(tensor.T))

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]])


In [10]:
tensor.add_(3)
print(tensor)

tensor([[4., 3., 4., 4.],
        [4., 3., 4., 4.],
        [4., 3., 4., 4.]])


In [11]:
#Torch AUTOGRAD
import torchvision

In [12]:
model = torchvision.models.resnet18()
data = torch.rand(1,3,64,64)
labels = torch.rand(1,1000)

print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
predictions = model(data)
print(predictions.size())

torch.Size([1, 1000])


In [14]:
loss = (predictions - labels).sum()
print(loss.detach().numpy())

-514.50684


In [15]:
loss.backward()

In [32]:
optim = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [33]:
optim.zero_grad()

In [34]:
optim.step()

In [18]:
#now onto a simple neural network

import torch.nn as nn
import torch.nn.functional as F

In [19]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.Conv1 = nn.Conv2d(1, 6, 5)
        self.Conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120) #5x5 image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # first, max pool over 2x2 window
        x = F.max_pool2d(F.relu(self.Conv1(x)), (2,2))
        # same thing here, short hand operation for 2x2 square
        x = F.max_pool2d(F.relu(self.Conv2(x)), 2)
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [20]:
net = Net()
print(net)

Net(
  (Conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (Conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [21]:
params = list(net.parameters())
#print(params)
print(len(params))
#print(params[0].size())  # conv1's .weight
for i in range(len(params)):
    print(params[i].size())

10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [22]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0547, -0.1229,  0.1024, -0.0640,  0.0339, -0.0360, -0.0209,  0.0441,
         -0.1223, -0.0352]], grad_fn=<AddmmBackward0>)


In [23]:
# net.zero_grad()
# out.backward(torch.randn(1, 10))

In [24]:
target = torch.randn(10)
target = target.view(1, -1)
criteron = nn.MSELoss()

loss = criteron(out, target)
print(loss)

tensor(0.9835, grad_fn=<MseLossBackward0>)


In [25]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions) #linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) #relu

<MseLossBackward0 object at 0x7fdcc6f391d0>
((<AddmmBackward0 object at 0x7fdcc6f39358>, 0), (None, 0))
<AccumulateGrad object at 0x7fdcc6f391d0>


In [26]:
net.zero_grad()

In [27]:
print('conv1.bias.grad before backward')
print(net.Conv1.bias.grad)

conv1.bias.grad before backward
None


In [28]:
loss.backward()

In [29]:
print('conv1.bias.grad after backward')
print(net.Conv1.bias.grad)

conv1.bias.grad after backward
tensor([0.0020, 0.0008, 0.0107, 0.0185, 0.0079, 0.0112])


In [37]:
learning_rate = 0.01
# weight = weight - learning rate * gradient
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)