In [1]:
import torch
import numpy as np

In [2]:
a = torch.Tensor(3)
print(a)

tensor([0., 0., 0.])


In [3]:
b = torch.Tensor([1,2])
print(b)

tensor([1., 2.])


In [4]:
c = torch.Tensor([2,2])
print(c)

tensor([2., 2.])


In [5]:
d = torch.rand([2,2,2])
print(d)

tensor([[[0.3192, 0.2745],
         [0.1739, 0.2762]],

        [[0.2070, 0.9111],
         [0.1745, 0.7736]]])


In [6]:
x = torch.randn([3,5])
y = torch.randn([5,4])
z = x @ y
print(z.shape)

torch.Size([3, 4])


In [7]:
z = z+z 

In [8]:
print(z.numpy())
print(type(z)) # torch tensor
print(type(z.numpy())) # numpy array)

[[ 2.6261988   0.09623829 -3.0123212  -2.8543663 ]
 [ 3.399937   -5.7573743   0.8399302  -3.4104428 ]
 [ 2.2365975   0.0135173   0.82174206 -2.0824642 ]]
<class 'torch.Tensor'>
<class 'numpy.ndarray'>


In [9]:
x = torch.Tensor(np.random.normal([3,5]))
print(type(x)) # torch tensor

<class 'torch.Tensor'>


# Autodiff

In [10]:
x = torch.tensor(1.0, requires_grad=True)



def u(x):
    return x*x
def g(u):
    return -u

In [11]:
dgdx = torch.autograd.grad(g(u(x)), x)[0]

print(dgdx)

tensor(-2.)


In [12]:
x = torch.tensor(([1,2,3],[4,5,6]))
print(x)



tensor([[1, 2, 3],
        [4, 5, 6]])


In [13]:
x_stacked_0 = torch.stack([x,x],0)
print(x_stacked_0)
print(x_stacked_0.size())

tensor([[[1, 2, 3],
         [4, 5, 6]],

        [[1, 2, 3],
         [4, 5, 6]]])
torch.Size([2, 2, 3])


In [14]:

x_stacked_1 = torch.stack([x,x],1)
print(x_stacked_1)
print(x_stacked_1.size())

tensor([[[1, 2, 3],
         [1, 2, 3]],

        [[4, 5, 6],
         [4, 5, 6]]])
torch.Size([2, 2, 3])


In [15]:

x_stacked_2 = torch.stack([x,x],2)
print(x_stacked_2)
print(x_stacked_2.size())

tensor([[[1, 1],
         [2, 2],
         [3, 3]],

        [[4, 4],
         [5, 5],
         [6, 6]]])
torch.Size([2, 3, 2])


In [16]:
print(x*x)
print(x)
print(torch.ones_like(x))

tensor([[ 1,  4,  9],
        [16, 25, 36]])
tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1, 1, 1],
        [1, 1, 1]])


In [17]:

x_stacked_square = torch.stack([x*x,x, torch.ones_like(x) ],-1)

print(x_stacked_square)
print(x_stacked_square.size())


tensor([[[ 1,  1,  1],
         [ 4,  2,  1],
         [ 9,  3,  1]],

        [[16,  4,  1],
         [25,  5,  1],
         [36,  6,  1]]])
torch.Size([2, 3, 3])


In [18]:
w = torch.tensor(torch.randn([3,1]), requires_grad=True)

opt = torch.optim.Adam([w],lr=0.1)


def model(x):
    f = torch.stack([x * x, x, torch.ones_like(x)], 1)
    yhat = torch.squeeze(f @ w, 1)
    return yhat


def compute_loss(y, yhat):
    loss = torch.nn.functional.mse_loss(yhat,y)
    return loss


def generate_data():
    x = torch.rand(100)*20 -10
    y = 5*x*x + 3
    return x,y

def train_step():
    x, y = generate_data()
    
    yhat = model(x)
    loss = compute_loss(y, yhat)
    opt.zero_grad()
    loss.backward()
    opt.step()
    
for _ in range(1000):
    train_step()
        
print(w.detach().numpy())

  w = torch.tensor(torch.randn([3,1]), requires_grad=True)


[[4.976842e+00]
 [1.006924e-03]
 [4.356219e+00]]


# Encapsulate the model with Modules



In [19]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Parameter(torch.rand(1))
        self.b = nn.Parameter(torch.rand(1))
        
    def forward(self,x):
        yhat = self.a*x + self.b
        return yhat

In [20]:
x = torch.arange(100,dtype=torch.float32)
net = Net()
y = net(x)


In [21]:
for p in net.parameters():
    print(p)

Parameter containing:
tensor([0.9216], requires_grad=True)
Parameter containing:
tensor([0.2227], requires_grad=True)


In [22]:
x = torch.arange(100,dtype=torch.float32) /100
y = 5*x +3 + torch.rand(100) * 0.3

In [23]:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)


for i in range(10000):
    net.zero_grad()
    yhat = net(x)
    loss = criterion(yhat,y)
    loss.backward()
    optimizer.step()
    
print(net.a,net.b)

Parameter containing:
tensor([5.0185], requires_grad=True) Parameter containing:
tensor([3.1362], requires_grad=True)


torch.nn.Linear is a predifined module following the one that we set above

In [24]:
class Net(torch.nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = torch.nn.Linear(1, 1)

    def forward(self, x):
        yhat = self.linear(x.unsqueeze(1)).squeeze(1)
        return yhat

In [25]:
net = Net()
for p in net.parameters():
    print(p)

Parameter containing:
tensor([[-0.4532]], requires_grad=True)
Parameter containing:
tensor([0.6816], requires_grad=True)


In [26]:
model = nn.Sequential(
    nn.Linear(64,32),
    nn.ReLU(),
    nn.Linear(32,10),
)

# Broadcasting

In [27]:
a = torch.tensor([[1.,2.], [3.,4.]])
print(a[0][0].item())

b = torch.tensor([[1.],[2.]])

c = a+b
print(c)

1.0
tensor([[2., 3.],
        [5., 6.]])


In [28]:
d = b.repeat([1,2])
print(d)

tensor([[1., 1.],
        [2., 2.]])


In [29]:
a = torch.rand([5,3,5])
b = torch.rand([5,1,6])

linear = nn.Linear(11,10)

tiled_b = b.repeat([1,3,1])
print(tiled_b.size())

c = torch.cat([a,tiled_b],2)
print(a.size())
print(c.size())

d = nn.functional.relu(linear(c))
print(d.shape)

torch.Size([5, 3, 6])
torch.Size([5, 3, 5])
torch.Size([5, 3, 11])
torch.Size([5, 3, 10])


In [30]:
print(a[0][0])
print(tiled_b[0][0])
print(c[0][0])

tensor([0.1019, 0.8310, 0.5774, 0.6921, 0.6067])
tensor([0.4334, 0.4603, 0.3950, 0.1296, 0.4351, 0.4713])
tensor([0.1019, 0.8310, 0.5774, 0.6921, 0.6067, 0.4334, 0.4603, 0.3950, 0.1296,
        0.4351, 0.4713])


But this can be done more efficiently with broadcasting. We use the fact that f(m(x + y)) is equal to f(mx + my). So we can do the linear operations separately and use broadcasting to do implicit concatenation:

In [32]:
a = torch.rand([5,3,5])
b = torch.rand([5,1,6])

linear1= nn.Linear(5,10)
linear2 = nn.Linear(6,10)

pa = linear1(a)
pb = linear2(b)
d = nn.functional.relu(pa+pb)

print(d.shape)


torch.Size([5, 3, 10])


In [33]:
class Merge(torch.nn.Module):
    def __init__(self, in_features1, in_features2, out_features, activation=None):
        super().__init__()
        self.linear1 = nn.Linear(in_features1, out_features)
        self.linear2= nn.Linear(in_features2, out_features)
        self.activation = activation
    
    def forward(self,a,b):
        pa = self.linear1(a)
        pb = self.linear2(b)
        c = pa + pb
        if self.activation is not None:
            c = self.activation(c)
        return c

In [34]:
a = torch.tensor([[1.],[2.]])
b = torch.tensor([1.,2.])
c= torch.sum(a+b,0)
print(c)

tensor([5., 7.])


# overloaded operators

In [37]:
import time

x = torch.rand([500,10])
z = torch.zeros([10])

start = time.time()
for i in range(500):
    z += x[i]
    
print("Took %f seconds." % (time.time() - start))

Took 0.002921 seconds.


In [40]:
z = torch.zeros([10])
for x_i in torch.unbind(x):
    z += x_i

tensor([0.7614, 0.5941, 0.9495, 0.7423, 0.7682, 0.1433, 0.0231, 0.0969, 0.0549,
        0.8405])

In [48]:
z = torch.sum(x,dim=0)
z[0]

tensor(244.8913)

In [59]:
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(a)
b = torch.sum(a,0)
print(b)
print(a[0],a[1])
c = torch.sum(a,1)
print(c)

tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([5, 7, 9])
tensor([1, 2, 3]) tensor([4, 5, 6])
tensor([ 6, 15])
