In [3]:
# Creating a graph in PyTorch
import torch
def compute_z(a,b,c):
    r1 = torch.sub(a,b)
    r2 = torch.mul(r1,2)
    z = torch.add(r2, c)
    return z

print(f"Scalar inputs: {compute_z(torch.tensor(1), torch.tensor(2), torch.tensor(3))}")
print(f"Scalar inputs: {compute_z(torch.tensor([1]), torch.tensor([2]), torch.tensor([3]))}")
print(f"Scalar inputs: {compute_z(torch.tensor([[1]]), torch.tensor([[2]]), torch.tensor([[3]]))}")

Scalar inputs: 1
Scalar inputs: tensor([1])
Scalar inputs: tensor([[1]])


In [15]:
a = torch.tensor(3.14, requires_grad=True) # I guess it corresponds to tensorflow Variable. 
print(a)
b = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
print(b)

c = torch.tensor([1.0, 2.0, 3.0], requires_grad=False)
print(c)
print(c.requires_grad)
c.requires_grad_() # in-place function
print(c)

tensor(3.1400, requires_grad=True)
tensor([1., 2., 3.], requires_grad=True)
tensor([1., 2., 3.])
False
tensor([1., 2., 3.], requires_grad=True)


In [23]:
import torch.nn as nn
torch.manual_seed(1)
w = torch.empty(2,3)
print(w)
nn.init.xavier_normal_(w)
print(w)

tensor([[7.7071e-44, 8.1725e+20, 5.3177e-08],
        [2.6371e-09, 2.1511e+23, 1.2794e+22]])
tensor([[ 0.4183,  0.1688,  0.0390],
        [ 0.3930, -0.2858, -0.1051]])


In [27]:
class MyModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.w1 = torch.empty(2,3,requires_grad=True)
        nn.init.xavier_normal_(self.w1)
        self.w2 = torch.empty(1,2,requires_grad=True)
        nn.init.xavier_uniform_(self.w2)

mod = MyModel()
print(mod.w1,"\n",mod.w2)

tensor([[-0.6018,  0.2566, -0.9591],
        [ 0.4631,  1.4432, -0.7640]], requires_grad=True) 
 tensor([[ 0.7693, -0.3316]], requires_grad=True)


In [41]:
# Computing the gradients of the loss with respect to trainable variables
w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.5, requires_grad=True)

x = torch.tensor([1.4])
y = torch.tensor([2.1])

z = torch.add(torch.mul(w, x), b)
print(z)
# z.backward()
print(w.grad, b.grad)

loss = (y-z).pow(2).sum()
print(loss)

loss.backward()
print(f"dL/dw: {w.grad:.4f}, and analitical derivative: {2 * (y - (w*x + b)) * x}")
print(f"dL/db: {b.grad:.4f}, and analitical derivative: {2 * (y - (w*x + b))}")

tensor([1.9000], grad_fn=<AddBackward0>)
None None
tensor(0.0400, grad_fn=<SumBackward0>)
dL/dw: -0.5600, and analitical derivative: tensor([0.5600], grad_fn=<MulBackward0>)
dL/db: -0.4000, and analitical derivative: tensor([0.4000], grad_fn=<MulBackward0>)


In [70]:
def weights_init(m):
    # print(m)
    classname = m.__class__.__name__
    print(classname)
    if classname.find('Linear') != -1:
        torch.nn.init.zeros_(m.weight)
        # torch.nn.init.normal_(m.weight, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        torch.nn.init.normal_(m.weight, 1.0, 0.02)
        torch.nn.init.zeros_(m.bias)


model = nn.Sequential(
    nn.Linear(4, 16),
    # nn.init.xavier_normal(),
    nn.ReLU(),
    nn.ReLU(),
    nn.Linear(16, 32),
    # nn.init.xavier_normal(),
    nn.ReLU()
)
model.apply(weights_init)
# model.state_dict()
model[0].weight

nn.init.xavier_normal_(model[0].weight)
model[0].weight

Linear
ReLU
ReLU
Linear
ReLU
Sequential


Parameter containing:
tensor([[-0.9815,  0.0155, -0.0966,  0.2447],
        [-0.1593,  0.6428,  0.2464,  0.4166],
        [-0.0223, -0.0953, -0.0662,  0.4552],
        [-0.4342,  0.1690, -0.1215,  0.7060],
        [ 0.0596, -0.0632, -0.3166,  0.2364],
        [ 0.0452, -0.3048,  0.1651,  0.2656],
        [ 0.5015, -0.2668,  0.0858,  0.2185],
        [ 0.5222,  0.5635,  0.3242, -0.1645],
        [ 0.0913,  0.2883,  0.3237, -0.0338],
        [-0.2596, -0.0228,  0.4489, -0.1574],
        [ 0.1405,  0.3110, -0.0946,  0.0627],
        [ 0.4120, -0.3310, -0.2239,  0.3932],
        [-0.1227,  0.1710,  0.2576,  0.1151],
        [ 0.4222, -0.1967,  0.2515,  0.6993],
        [-0.0163,  0.3057, -0.5568, -0.0113],
        [-0.5595,  0.0929, -0.4644,  0.4916]], requires_grad=True)

In [None]:
# Choosing a loss function