In [32]:
import os
import time
import torch
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

path = os.getcwd() + '/Documents/GitHub/fcc-intro-to-llms/'
print(path)

cpu
/Users/haowa/Documents/GitHub/fcc-intro-to-llms/Documents/GitHub/fcc-intro-to-llms/


In [7]:
%%time

start_time = time.time()

# matrix operations here
zeros = torch.zeros(1, 1)
end_time = time.time()

elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")


0.00770569
CPU times: user 2.57 ms, sys: 3.41 ms, total: 5.98 ms
Wall time: 8.07 ms


In [11]:
torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()
rand = (torch_rand1 @ torch_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")


start_time = time.time()
rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time:.8f}")

0.19950104
0.19246984


In [12]:
# embeddings: torch.stack, torch.multinomial, torch.tril, torch.triu,
#             input.T / input.transpose, nn.Linear, torch.cat, 
#             F.softmax (show all the examples of functions/methods with pytorch docs)


# Define a probability tensor
probabilities = torch.tensor([0.1, 0.9])
# 10% or 0.1 chance of 0, 90% or 0.9 for getting 1. 
# each probability points to the index of the probability in the tensor

# Draw 5 samples from the multinomial distribution
samples = torch.multinomial(probabilities, num_samples=10, replacement=True)
print(samples)


tensor([1, 1, 0, 1, 1, 0, 1, 1, 1, 1])


In [14]:
tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
out

tensor([1, 2, 3, 4, 5])

In [13]:
# triangle lower, top left -> bottom right
out = torch.tril(torch.ones(5, 5))
out

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [15]:
out = torch.triu(torch.ones(5, 5))
out

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [16]:
out = torch.zeros(5, 5).masked_fill(torch.tril(torch.ones(5, 5)) == 0, float('-inf'))
out

tensor([[0., -inf, -inf, -inf, -inf],
        [0., 0., -inf, -inf, -inf],
        [0., 0., 0., -inf, -inf],
        [0., 0., 0., 0., -inf],
        [0., 0., 0., 0., 0.]])

In [17]:
torch.exp(out)  # expoential 

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])

In [19]:
# transposing

input = torch.zeros(2, 3, 4)
out1 = input.transpose(0, 1)
out2 = input.transpose(-2,-1)
print(out1.shape)
print(out1)
print(out2.shape)
print(out2)
# torch.permute works the same but you provide the new order of dimensions instead of the dimensions you'd like to swap.

torch.Size([3, 2, 4])
tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
torch.Size([2, 4, 3])
tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])


In [20]:
tensor1 = torch.tensor([1, 2, 3])
tensor2 = torch.tensor([4, 5, 6])
tensor3 = torch.tensor([7, 8, 9])

# Stack the tensors along a new dimension
stacked_tensor = torch.stack([tensor1, tensor2, tensor3])
stacked_tensor


tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [22]:
import torch.nn as nn

sample = torch.tensor([10.,10.,10.])

# linear transformation
linear = nn.Linear(3, 3, bias=False)  # important function with learnable parameters
print(linear(sample))

## nn module is for building all kinds of layers

tensor([-11.0786,   0.5570,  -8.5548], grad_fn=<SqueezeBackward3>)


In [24]:
import torch.nn.functional as F

# Create a tensor
tensor1 = torch.tensor([1.0, 2.0, 3.0])

# Apply softmax using torch.nn.functional.softmax()
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


In [25]:
# Initialize an embedding layer
vocab_size = 1000
embedding_dim = 100
embedding = nn.Embedding(vocab_size, embedding_dim)

# Create some input indices
input_indices = torch.LongTensor([1, 5, 3, 2])

# Apply the embedding layer
embedded_output = embedding(input_indices)

# The output will be a tensor of shape (4, 100), where 4 is the number of inputs
# and 100 is the dimensionality of the embedding vectors
print(embedded_output.shape)
#print(embedded_output)


torch.Size([4, 100])
tensor([[-0.2188,  0.7841,  2.6403,  0.3121,  0.3632, -0.6621,  2.1412,  0.1229,
         -1.0969,  0.5484,  0.3657, -0.4733, -0.3592,  1.0823,  0.7958,  0.3494,
         -0.0730,  0.1379, -0.5833,  0.1108, -1.6972, -1.2965,  0.8129, -1.1002,
         -0.4932, -0.2337,  0.3880,  1.8393,  2.2960, -2.0821, -0.3419, -1.3160,
         -0.0182, -0.6696, -1.7673, -0.0542,  1.1316, -0.0771, -1.0135,  1.2891,
         -0.0359, -1.4133,  2.9169, -1.0854,  0.5800, -0.6694, -1.9071, -0.2804,
          0.8121,  0.2078,  0.4932, -2.7595, -0.3539,  0.8492,  0.9628, -0.3923,
          0.1586,  1.4770,  1.3950,  1.6958, -0.8203, -0.0628,  2.5659, -0.6018,
          1.1829, -1.8072, -0.5042,  0.6044, -0.4598,  0.4194,  1.9349,  0.8478,
         -0.7287,  0.7869,  1.2730, -0.1650,  0.9479, -0.4012,  1.3671, -0.9694,
          0.4269, -0.1384, -0.2320,  0.6373, -0.8759, -1.0238,  0.9546,  0.4444,
          0.6822,  0.6657, -0.2852, -0.2930,  0.7320, -2.5093,  0.1909, -0.3346,
       

In [26]:
# dot production: @

a = torch.tensor([[1,2],[3,4],[5,6]])
b = torch.tensor([[7,8,9],[10,11,12]])
# print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


In [31]:
int_64 = torch.randint(1, (3, 2)).float()  # int64 type

float_32 = torch.rand(2, 3)  # float32 type

result = torch.matmul(int_64, float_32) # dot production 
print(result)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
