In [2]:
import torch
import numpy as np
import time
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [2]:
randint = torch.randint(-100,100,(6,))
randint

tensor([ 75,  43, -65,  61,  34,  60])

In [5]:
tensor = torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
tensor

tensor([[0.1000, 1.2000],
        [2.2000, 3.1000],
        [4.9000, 5.2000]])

In [9]:
zeros = torch.zeros(2,3)
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [10]:
ones = torch.ones(3,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [11]:
input = torch.empty(2,3)
input

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [13]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

In [15]:
linspace = torch.linspace(3,10,steps=5)
linspace

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

In [16]:
logspace = torch.logspace(start=-10, end=10,steps=5)
logspace

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

In [17]:
eye = torch.eye(5)
eye

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [18]:
a = torch.empty((2,3), dtype=torch.int64)
empty_like = torch.empty_like(a)
empty_like

tensor([[0, 0, 0],
        [0, 0, 0]])

In [22]:
# %%time is a cell magic command that measures the execution time of the entire cell

%%time

start_time = time.time()
zeros = torch.zeros(1,1)
end_time = time.time()

elapsed_time = end_time - start_time
#print(f'{elapsed_time:.8f}')

CPU times: total: 0 ns
Wall time: 0 ns


In [12]:
# multinomial

probabilities = torch.tensor([0.1,0.9]) # 10% chance of 0; 90% chance of 1
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)

tensor([1, 1, 0, 1, 1, 1, 1, 1, 1, 0])


In [14]:
# concatenate two tensors into one

tensor = torch.tensor([1,2,3,4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
print(out)

tensor([1, 2, 3, 4, 5])


In [15]:
# triangle - lower

out = torch.tril(torch.ones(5,5))
print(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])


In [16]:
# triangle - lower

out = torch.triu(torch.ones(5,5))
print(out)

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])


In [18]:
# masked fill for self attention (later)
out = torch.zeros(5,5).masked_fill(torch.tril(torch.ones(5,5)) == 0, float('-inf'))
print(out)

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])


In [19]:
torch.exp(out)

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [24]:
input = torch.zeros([2,3,4])
out = input.transpose(0,2)
out.shape

torch.Size([4, 3, 2])

In [25]:
tensor1 = torch.tensor([1,2,3])
tensor2 = torch.tensor([4,5,6])
tensor3 = torch.tensor([7,8,9])

stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
print(stacked_tensor)

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


In [42]:
# linear transormation (matrix multiplication)

"""
nn.Linear performs a matrix multiply
It’s the core of every transformer layer (Q, K, V projections are all nn.Linear)
It transforms your input vector into a new space with learnable weights
"""

import torch.nn as nn
sample = torch.tensor([10.,10.,10.])
linear = nn.Linear(3,3,bias=False)
print(linear(sample))

tensor([ 2.2653, -6.3965,  1.1472], grad_fn=<SqueezeBackward4>)


In [26]:
#softmax function -- research this later!!
# i really need to learn more about exp

import torch.nn.functional as F

# create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# apply softmax using forch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [43]:
import torch
import torch.nn as nn

# initialize the embedding layer
vocab_size = 10000
embedding_dim_size = 100
embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim_size)

# create some input indices
input_indices = torch.LongTensor([1,5,3,2])   # e.g. token IDs

vectors = embedding(input_indices)

print(vectors.shape)  # torch.Size([4, 100) # 4 tokens x 100 dimensions of the embedding vectors

torch.Size([4, 100])


In [46]:
# matrix multiplication

a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
print(a@b)
print(torch.matmul(a,b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
