In [2]:
with open("verdict.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()
print("Total number of character:", len(raw_text))
print(raw_text[:99])

Total number of character: 20479
I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no 


In [3]:
import re

preprocessed = re.split(r'([,.?_!"()\']|--|\s)', raw_text)
preprocessed = [item.strip() for item in preprocessed if item.strip()]
print(len(preprocessed))

4649


In [4]:
print(preprocessed[:30])

['I', 'HAD', 'always', 'thought', 'Jack', 'Gisburn', 'rather', 'a', 'cheap', 'genius', '--', 'though', 'a', 'good', 'fellow', 'enough', '--', 'so', 'it', 'was', 'no', 'great', 'surprise', 'to', 'me', 'to', 'hear', 'that', ',', 'in']


In [5]:
#Converting tokens into token IDs
all_words = sorted(list(set(preprocessed)))
vocab_size = len(all_words)
print(vocab_size)


1159


In [6]:
vocab = {token:integer for integer, token in enumerate(all_words)}
# for i, item in enumerate(vocab.items()):
#     print(item)
#     if i > 50:
#         break

In [7]:
class SimpleTokeniserV1:
    def __init__(self, vocab):
        self.str_to_int = vocab
        self.int_to_str = {i:s for s,i in vocab.items()}
    
    def encode(self, text):
        # Split text on special characters and whitespace
        preprocessed = re.split(r'([,.?_!"()\']|--|\s)', text)
        preprocessed = [item.strip() for item in preprocessed if item.strip()]
        ids = [self.str_to_int[s] for s in preprocessed]
        return ids
    
    def decode(self, ids):
        text = " ".join([self.int_to_str[i] for i in ids])
        text = re.sub(r'([,.?_!"()\']|--|\s)', r'\1',text)
        return text
        
# If text is: "Hello, world!"
# After re.split():
# preprocessed = ['Hello', ',', '', 'world', '!']

# After the cleaning loop:
# preprocessed = ['Hello', ',', 'world', '!']

In [8]:
tokeniser = SimpleTokeniserV1(vocab)
text = """It's the last he painted"""
ids = tokeniser.encode(text)
print(ids)

[58, 2, 872, 1013, 615, 541, 763]


In [9]:
print(tokeniser.decode(ids))

It ' s the last he painted


In [10]:
text = "Hello, do you like tea?"
x = tokeniser.encode(text)
print(x)

KeyError: 'Hello'

In [11]:
all_tokens = sorted(list(set(preprocessed)))
all_tokens.extend(["<|unk|>", "<|endoftext|>"])
vocab = {token:integer for integer, token in enumerate(all_tokens)}
print(len(vocab.items()))

1161


In [12]:
for i, item in enumerate(list(vocab.items())[-5:]):
    print(item)

('younger', 1156)
('your', 1157)
('yourself', 1158)
('<|unk|>', 1159)
('<|endoftext|>', 1160)


In [13]:
class SimpleTokeniserV2:
    def __init__(self, vocab):
        self.str_to_int = vocab
        self.int_to_str = {i:s for s,i in vocab.items()}
        
    def encode(self, text):
#         def handle_token(token):
#             if not token:
#                 return None
#             token = token.strip()
#             return token if token in self.str_to_int else "<|unk|>"
        
        preprocessed = re.split(r'([,.?_!"()\']|--|\s)', text)
#         item.strip() for item in preprocessed if item.strip()
#         preprocessed = [tk for tk in (handle_token(item) for item in preprocessed) if tk]
        preprocessed  = [item.strip() if item.strip() in self.str_to_int else "<|unk|>" 
                           for item in preprocessed if item.strip()]
        ids = [self.str_to_int[s] for s in preprocessed]
        return ids

    def decode(self, ids):
        text = " ".join([self.int_to_str[i] for i in ids])
        text = re.sub(r'\s+([,.?!"()\'])', r'\1', text) 
        return text

In [14]:
text1 = "Hello, do you like tea?"
text2 = "In the sunlit terraces of the palace."
text = " <|endoftext|> ".join((text1, text2))
print(text)

Hello, do you like tea? <|endoftext|> In the sunlit terraces of the palace.


In [15]:
tokeniser = SimpleTokeniserV2(vocab)
print(tokeniser.encode(text))

[1159, 5, 362, 1155, 642, 1000, 10, 1160, 57, 1013, 981, 1009, 738, 1013, 1159, 7]


In [16]:
print(tokeniser.decode(tokeniser.encode(text)))

<|unk|>, do you like tea? <|endoftext|> In the sunlit terraces of the <|unk|>.


In [17]:
import tiktoken
tokeniser = tiktoken.get_encoding("cl100k_base") #gpt2
text = "Hello, do you like tea? <|endoftext|> In the sunlit terra"
integers = tokeniser.encode(text, allowed_special={"<|endoftext|>"})
print(integers)

[9906, 11, 656, 499, 1093, 15600, 30, 220, 100257, 763, 279, 7160, 32735, 60661]


In [18]:
# BPE tokenizers break down unknown words into subwords and individual
# characters. This way, a BPE tokenizer can parse any word and doesn't need to replace unknown
# words with special tokens, such as <|unk|>
strings = tokeniser.decode(integers)
print(strings)

Hello, do you like tea? <|endoftext|> In the sunlit terra


In [19]:
#creating input-target pairs
#first will tokenise the whole testing set

with open("verdict.txt", "r", encoding="utf-8") as f:
    raw_text  = f.read()

enc_text = tokeniser.encode(raw_text)
print(len(enc_text))

4943


In [20]:
#remove first 50 tokens for visual demonstration purposes
enc_sample = enc_text[50:]

In [21]:
# let x = input tokens, y = target tokens, where y=x[pos+1]
context_size = 4
x = enc_sample[:context_size]
y = enc_sample[1:context_size+1]
print(f"x : {x}")
print(f"y :\t {y}")

x : [323, 9749, 5678, 304]
y :	 [9749, 5678, 304, 264]


In [22]:
for i in range(1, context_size+1):
    context = enc_sample[:i]
    desired = enc_sample[i]
    print(context, "--->", desired)

[323] ---> 9749
[323, 9749] ---> 5678
[323, 9749, 5678] ---> 304
[323, 9749, 5678, 304] ---> 264


In [23]:
for i in range(1, context_size+1):
    context = enc_sample[:i]
    desired = enc_sample[i]
    print(tokeniser.decode(context) , "--->" ,tokeniser.decode([desired]))

 and --->  established
 and established --->  himself
 and established himself --->  in
 and established himself in --->  a


In [24]:
import torch.nn.functional as F

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2])
b = torch.tensor([0.0])
z = x1* w1 + b
a = torch.sigmoid(z)

a
loss = F.binary_cross_entropy(a, y)
loss
#p 216

tensor(0.0852)

In [62]:
import torch
from torch.utils.data import Dataset, DataLoader

class GPTDatasetV1(Dataset):
    def __init__(self, txt, tokeniser, max_length, stride):
        self.tokeniser = tokeniser
        self.input_ids=  []
        self.target_ids =[]
        
        token_ids = tokeniser.encode(txt)
        
        for i in range(0, len(token_ids) - max_length, stride):
            input_chunk = token_ids[i: i + max_length]
            target_chunk= token_ids[i+1: i + max_length+ 1]
            self.input_ids.append(torch.tensor(input_chunk))
            self.target_ids.append(torch.tensor(target_chunk))
            
    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.target_ids[idx]

In [63]:
def create_dataloader_v1(txt, batch_size=4 ,max_length=256, stride=128, shuffle=True, drop_last=True):
    tokeniser = tiktoken.get_encoding("cl100k_base")
    dataset = GPTDatasetV1(txt, tokeniser, max_length, stride)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    return dataloader

In [134]:
with open("verdict.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

dataloader = create_dataloader_v1(raw_text, batch_size=3, max_length=4, stride=2, shuffle=False)
# print(dataloader)
data_iter = iter(dataloader)
first_batch = next(data_iter)

for i in range(0, len(first_batch)+1):
    print(f"{first_batch[0][i]}\t{first_batch[1][i]}\n")



tensor([  40,  473, 1846, 2744])	tensor([ 473, 1846, 2744, 3463])

tensor([1846, 2744, 3463, 7762])	tensor([2744, 3463, 7762,  480])

tensor([3463, 7762,  480,  285])	tensor([ 7762,   480,   285, 22464])



In [135]:
second = next(data_iter)
print(second)

[tensor([[  480,   285, 22464,  4856],
        [22464,  4856,   264, 12136],
        [  264, 12136, 35201,   313]]), tensor([[  285, 22464,  4856,   264],
        [ 4856,   264, 12136, 35201],
        [12136, 35201,   313,  4636]])]


In [94]:
import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([0.1])
x1 = torch.tensor([1.1]) #input value
w1 = torch.tensor([2.2], requires_grad=True) #input weight
b = torch.tensor([0.0], requires_grad=True) #bias

z = x1 * w1 + b #net j
a = torch.sigmoid(z) #activation function 

# a = sigmoid((input value * input weight) + bias)

loss = F.binary_cross_entropy(a,y)


grad_L_w1 = grad(loss, w1 , retain_graph=True)
grad_L_b = grad(loss, b , retain_graph=True)

print(grad_L_w1)
print(grad_L_b)


(tensor([0.9002]),)
(tensor([0.8183]),)


In [96]:
loss.backward()
print(w1.grad)
print(b.grad)

tensor([0.9002])
tensor([0.8183])


In [110]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        
        self.layers = torch.nn.Sequential(
            
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),
            
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),
            
            torch.nn.Linear(20, num_outputs),
                
        )
        
    def forward(self, x):
        logits = self.layers(x)
        return logits

In [116]:
model = NeuralNetwork(50,3)
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [119]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)

Total number of trainable model parameters: 2213


In [125]:
s = 0
for p in model.parameters():
    print(p.numel())
    if p.requires_grad:
        s += p.numel()

print("trainable params:", s)

1500
30
600
20
60
3
trainable params: 2213


In [129]:
print(model.layers[0])
print(model.layers[0].weight)
print(model.layers[0].weight.shape)

Linear(in_features=50, out_features=30, bias=True)
Parameter containing:
tensor([[-0.0375,  0.0539, -0.1082,  ...,  0.1038, -0.0810,  0.0478],
        [-0.0741,  0.0248, -0.0768,  ..., -0.0359, -0.0406,  0.0554],
        [-0.0500, -0.0994, -0.0284,  ..., -0.1246, -0.0889,  0.0897],
        ...,
        [ 0.0224, -0.0522,  0.0206,  ..., -0.0381,  0.0340,  0.0025],
        [ 0.1126, -0.0107, -0.0752,  ..., -0.0159,  0.1299,  0.1366],
        [ 0.1150,  0.1264, -0.0367,  ...,  0.1217,  0.1297, -0.1413]],
       requires_grad=True)
torch.Size([30, 50])


In [130]:
torch.manual_seed(123)
model = NeuralNetwork(50, 3)
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [140]:
torch.manual_seed(123)
X= torch.rand((1,50))
out = model.forward(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)


In [142]:
with torch.no_grad():
    out = model(X)
print(out)

tensor([[-0.1262,  0.1080, -0.1792]])


In [146]:
with torch.no_grad():
    out = torch.softmax(model(X), dim=1)
print(out)

tensor([[0.3113, 0.3934, 0.2952]])


In [147]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

y_train = torch.tensor([0,0,0,1,1])

X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6],
])
y_test = torch.tensor([0, 1])


In [194]:
from torch.utils.data import Dataset
class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels= y
    
    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y

    def __len__(self):
        return self.labels.shape[0]


train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [195]:
print(len(train_ds))

5


In [196]:
from torch.utils.data import DataLoader

In [197]:
torch.manual_seed(123)

train_loader = DataLoader(
    dataset= train_ds, 
    batch_size= 2,
    shuffle= True,
    num_workers=  0
)

test_loader = DataLoader(
    dataset = test_ds,
    batch_size = 2,
    shuffle=True,
    num_workers=0
)

In [198]:
for idx, (x,y) in enumerate(train_loader):
    print(f"Batch {idx+1}:" , x, y)

Batch 1: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])
Batch 2: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])


In [199]:
train_loader = DataLoader(
    dataset= train_ds, 
    batch_size= 2,
    shuffle= True,
    num_workers=  0,
    drop_last = True
)

In [200]:
for idx, (x,y) in enumerate(train_loader):
    print(f"Batch {idx+1}:" , x, y)

Batch 1: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])
Batch 2: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])


In [203]:
import torch.nn.functional as F

torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)
optimiser= torch.optim.SGD(model.parameters(), lr=0.375)
num_epochs = 3

for epoch in range(num_epochs):
    
    model.train()
    for batch_idx, (features, labels) in enumerate(train_loader):
        
        logits = model(features)
        
        loss = F.cross_entropy(logits, labels)
        
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        
        ### LOGGING
        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
        f" | Batch {batch_idx:03d}/{len(train_loader):03d}"
        f" | Train Loss: {loss:.2f}")
        
    model.eval()
            

Epoch: 001/003 | Batch 000/002 | Train Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train Loss: 0.56
Epoch: 002/003 | Batch 000/002 | Train Loss: 0.49
Epoch: 002/003 | Batch 001/002 | Train Loss: 0.14
Epoch: 003/003 | Batch 000/002 | Train Loss: 0.07
Epoch: 003/003 | Batch 001/002 | Train Loss: 0.01


In [206]:
print(f"{X_train} \n")

model.eval()
with torch.no_grad():
    outputs = model(X_train)
print(outputs)

tensor([[-1.2000,  3.1000],
        [-0.9000,  2.9000],
        [-0.5000,  2.6000],
        [ 2.3000, -1.1000],
        [ 2.7000, -1.5000]]) 

tensor([[ 2.2694, -3.3964],
        [ 2.0256, -3.0810],
        [ 1.6876, -2.6358],
        [-1.1288,  1.1294],
        [-1.2999,  1.3172]])


In [213]:
torch.set_printoptions(sci_mode= False)
probas = torch.softmax(outputs, dim=1)
print(probas)

tensor([[0.9965, 0.0035],
        [0.9940, 0.0060],
        [0.9869, 0.0131],
        [0.0946, 0.9054],
        [0.0680, 0.9320]])


In [214]:
predicted_labels = torch.argmax(outputs, dim=1)
print(predicted_labels)


tensor([0, 0, 0, 1, 1])


In [219]:
print(predicted_labels == y_train)

tensor([True, True, True, True, True])


In [220]:
print(torch.sum(predicted_labels == y_train))

tensor(5)


In [227]:
def compute_accuracy(model, dataloader):
    
    model = model.eval()
    correct = 0.0
    total_examples = 0
    
    for idx, (features, labels) in enumerate(dataloader):
        
        with torch.no_grad():
            logits = model(features)
            
        predictions = torch.argmax(logits, dim=1)
        compare = labels == predictions
        correct += torch.sum(compare)
        total_examples += len(compare)
        
    return (correct/  total_examples).item()*100
            

In [228]:
print(compute_accuracy(model, train_loader))

100.0


In [229]:
print(compute_accuracy(model, test_loader))

100.0


In [230]:
torch.save(model.state_dict(), "model.pth")

In [231]:
model = NeuralNetwork(2, 2)

In [232]:
model.load_state_dict(torch.load("model.pth"))

  model.load_state_dict(torch.load("model.pth"))


<All keys matched successfully>

In [233]:
print(torch.cuda.is_available())

True


In [234]:
tensor_1 = torch.tensor([1., 2., 3.])
tensor_2 = torch.tensor([4., 5., 6.])
print(tensor_1 + tensor_2)

tensor([5., 7., 9.])


In [241]:
tensor_1 = tensor_1.to("cuda")
tensor_2 = tensor_2.to("cuda")
print(tensor_1 + tensor_2)

tensor([5., 7., 9.], device='cuda:0')


In [242]:
tensor_1 = tensor_1.to("cpu")
print(tensor_1 + tensor_2)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

In [244]:
torch.manual_seed(123)
model = NeuralNetwork(num_inputs=2, num_outputs=2)
device = torch.device("cuda") #A
model = model.to(device) #B
optimizer = torch.optim.SGD(model.parameters(), lr=0.375)
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (features, labels) in enumerate(train_loader):
        features, labels = features.to(device), labels.to(device) #C
        logits = model(features)
        loss = F.cross_entropy(logits, labels) # Loss function
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        ### LOGGING
        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
        f" | Batch {batch_idx:03d}/{len(train_loader):03d}"
        f" | Train/Val Loss: {loss:.2f}")
        model.eval()
    # Optional model evaluation

Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.56
Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.49
Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.14
Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.07
Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.01


In [248]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda
