In [13]:
import torch

In [14]:
x = torch.randint(23,78,(6,))
x

tensor([43, 61, 26, 67, 43, 46])

In [15]:
x = torch.tensor([[0.1,1.5],[5.6,7.8],[4.9,7.8]])
x

tensor([[0.1000, 1.5000],
        [5.6000, 7.8000],
        [4.9000, 7.8000]])

In [16]:
zeros = torch.zeros(5,6)
zeros

tensor([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]])

In [17]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

In [18]:
torch.linspace(3 , 10 , steps=5)

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

In [19]:
torch.logspace(start=20 , end=30 , steps=6)

tensor([1.0000e+20, 1.0000e+22, 1.0000e+24, 1.0000e+26, 1.0000e+28, 1.0000e+30])

In [20]:
torch.eye(5)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [21]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [22]:
import time

In [23]:
%%time

start_time = time.time()
# matrix operations here
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.00000000
CPU times: total: 0 ns
Wall time: 1e+03 µs


In [29]:
import numpy as np
np.random.seed(42)

torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()

rand = (torch_rand1 @ torch_rand2)

end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")


start_time = time.time()

rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.01199794
0.08800268


In [30]:
# embeddings, torch.stack, torch.multinomial, torch.tril, torch.triu, input.T / input.transpose, nn.Linear, torch.cat, F.softmax (show all the examples of functions/methods with pytorch docs)


# Define a probability tensor
probabilities = torch.tensor([0.1, 0.9])
# 10% or 0.1 => 0, 90% or 0.9 => 1. each probability points to the index of the probability in the tensor
# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 1, 1, 1, 1, 1, 0, 1, 1])


In [32]:
tensor = torch.tensor([1,2,3,4,5])
torch.cat((tensor , torch.tensor([6])),dim=0)

tensor([1, 2, 3, 4, 5, 6])

In [35]:
torch.tril(torch.ones(5, 5)) #triangle-lower

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [34]:
torch.triu(torch.ones(5, 5))

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [40]:
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [41]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [42]:
input = torch.zeros(2, 3, 4)
out1 = input.transpose(0, 1)
out2 = input.transpose(-2,-1)
print(out1.shape)
print(out2.shape)

torch.Size([3, 2, 4])
torch.Size([2, 4, 3])


In [43]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [45]:
import torch.nn as nn
sample = torch.tensor([10.,10.,10.])
linear = nn.Linear(3, 3, bias=False)
print(linear(sample))

tensor([8.2435, 2.6590, 2.2763], grad_fn=<SqueezeBackward4>)


In [46]:
import torch.nn.functional as F

# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [47]:
# Initialize an embedding layer
vocab_size = 80
embedding_dim = 6
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedded_output = embedding(input_indices)

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the dimensionality of the embedding vectors
print(embedded_output.shape)
print(embedded_output)

torch.Size([4, 6])
tensor([[ 1.2633,  0.0175,  0.9524,  1.7029, -0.1702, -0.2833],
        [ 0.4588,  1.7319,  0.4328, -1.5019,  0.1440, -0.4390],
        [ 1.9953,  0.3270, -0.5081,  0.4061, -0.3439,  0.2553],
        [ 1.0329,  0.2055, -0.7425, -0.0399, -0.4591, -0.8632]],
       grad_fn=<EmbeddingBackward0>)


In [49]:
%%time

a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
# print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
CPU times: total: 0 ns
Wall time: 993 µs


In [51]:
int_64 = torch.randint(1, (3, 2)).float()
#type int64
float_32 = torch.rand(2,3)
#type float32
# print(int_64.dtype, float_32.dtype)
result = torch.matmul(int_64, float_32)
print(result)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [52]:
a = torch.rand(2, 3, 5)
print(a.shape)
x, y, z = a.shape
a = a.view(x,y,z)
# print(x, y, z)
print(a.shape)

torch.Size([2, 3, 5])
torch.Size([2, 3, 5])


In [54]:
input = torch.rand((4, 8, 10))
B, T, C = input.shape
output = input.view(B*T, C)
print(output)

tensor([[0.3203, 0.1770, 0.2191, 0.1457, 0.7302, 0.6855, 0.9351, 0.4915, 0.5075,
         0.7452],
        [0.6385, 0.9338, 0.7547, 0.8211, 0.6127, 0.1113, 0.1850, 0.7191, 0.0564,
         0.6010],
        [0.1653, 0.7021, 0.8590, 0.8530, 0.2933, 0.4051, 0.1676, 0.8776, 0.9784,
         0.5452],
        [0.9815, 0.0280, 0.8275, 0.2695, 0.9323, 0.3622, 0.0494, 0.8260, 0.7863,
         0.8898],
        [0.4220, 0.5591, 0.0079, 0.7785, 0.8300, 0.4012, 0.8604, 0.9181, 0.6686,
         0.5274],
        [0.7333, 0.9536, 0.8503, 0.6678, 0.1316, 0.8655, 0.0392, 0.0702, 0.6466,
         0.6001],
        [0.8402, 0.1678, 0.1420, 0.0104, 0.1300, 0.3053, 0.5032, 0.1470, 0.3103,
         0.0241],
        [0.7868, 0.9778, 0.5128, 0.6442, 0.4088, 0.9406, 0.4600, 0.9584, 0.0821,
         0.8926],
        [0.7893, 0.3681, 0.6955, 0.7904, 0.0038, 0.6792, 0.8603, 0.5468, 0.4804,
         0.5234],
        [0.4337, 0.6267, 0.8455, 0.6524, 0.7060, 0.8280, 0.2760, 0.1401, 0.1065,
         0.6237],
        [0