In [1]:
import torch
import torch.nn as nn
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
block_size = 8
batch_size = 4
max_iters = 10000
learning_rate = 3e-4

cpu


In [2]:
with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
    text = f.read()
chars = sorted(set(text))
print(chars)
vocab_size = len(chars)
print(vocab_size)

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
80


In [3]:
string_to_int = { ch:i for i, ch in enumerate(chars) }
int_to_string = { i:ch for i, ch in enumerate(chars) }
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

text1 = encode('hello')
print(text1)
print(decode(text1))

[61, 58, 65, 65, 68]
hello


In [4]:
data = torch.tensor(encode(text), dtype=torch.long)
print(data[:100])

tensor([ 1,  1, 28, 39, 42, 39, 44, 32, 49,  1, 25, 38, 28,  1, 44, 32, 29,  1,
        47, 33, 50, 25, 42, 28,  1, 33, 38,  1, 39, 50,  0,  0,  1,  1, 26, 49,
         0,  0,  1,  1, 36, 11,  1, 30, 42, 25, 38, 35,  1, 26, 25, 45, 37,  0,
         0,  1,  1, 25, 45, 44, 32, 39, 42,  1, 39, 30,  1, 44, 32, 29,  1, 47,
        33, 50, 25, 42, 28,  1, 39, 30,  1, 39, 50,  9,  1, 44, 32, 29,  1, 36,
        25, 38, 28,  1, 39, 30,  1, 39, 50,  9])



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\HP\AppData\Local\Programs\Python\Python312\Lib\runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "D:\fcc-gpt-project\cuda\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "D:\fcc-gpt-project\cuda\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.s

In [5]:
n = int(0.8 * len(data))
train_data = data[:n]
val_data = data[n:]


In [6]:
X = train_data[:block_size]
y = train_data[1: block_size + 1]

for t in range(block_size):
    context = X[:t+1]
    target = y[t]
    print("When input is", context, "target is", target)

When input is tensor([1]) target is tensor(1)
When input is tensor([1, 1]) target is tensor(28)
When input is tensor([ 1,  1, 28]) target is tensor(39)
When input is tensor([ 1,  1, 28, 39]) target is tensor(42)
When input is tensor([ 1,  1, 28, 39, 42]) target is tensor(39)
When input is tensor([ 1,  1, 28, 39, 42, 39]) target is tensor(44)
When input is tensor([ 1,  1, 28, 39, 42, 39, 44]) target is tensor(32)
When input is tensor([ 1,  1, 28, 39, 42, 39, 44, 32]) target is tensor(49)


In [7]:
def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i: i + block_size] for i in ix])
    y = torch.stack([data[i + 1: i + block_size + 1] for i in ix])
    return x, y

x, y = get_batch('train')
print('inputs:')
print(x)

print('target:')
print(y)
    

inputs:
tensor([[62, 72, 61, 10, 65, 62, 67, 58],
        [54, 72, 64, 58, 57,  1, 28, 68],
        [73, 61, 58, 66,  9,  0, 55, 74],
        [54, 67, 57,  1, 54, 73,  1, 68]])
target:
tensor([[72, 61, 10, 65, 62, 67, 58, 11],
        [72, 64, 58, 57,  1, 28, 68, 71],
        [61, 58, 66,  9,  0, 55, 74, 73],
        [67, 57,  1, 54, 73,  1, 68, 67]])


In [8]:
from torch.nn import functional as F

In [9]:
class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, index, targets=None):
        logits = self.token_embedding_table(index)
        
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape   # batch, time, channels
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        return logits, loss

    def generate(self, index, max_new_tokens):
        # index is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # get the predictions
            logits, loss = self.forward(index)
            # focus only on the last time step
            logits = logits[:, -1, :] # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1) # (B, C)
            # sample from the distribution
            index_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            # append sample index to the running sequence
            index = torch.cat((index, index_next), dim=1) # (B, T+1)
        return index

model = BigramLanguageModel(vocab_size)
m = model.to(device)

context = torch.zeros((1,1), dtype=torch.long)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)



Gj8,kDbM
7dT-SXkVF[wZr?63apEmwM
a] YlS":QE5Q dv]iB;-*Z
8.r_F(dvqdd(MzcFT*kO:Va2jF9aT"NFlXTgA:.
D3a] caphV!";_qjw4X13oBa'WI,zDo0P0:I[CHjk:QDqJo1IU7"k4?D,IgOmHn6,Z[gV8f?
70(j,
]]-g*AJWHHW?LdIfXo026vRye2Z(60CV!*quclb;[wI4o_hVzKgRqe6_NFx!":OKxaBaT jE])ZD3Un?U7"ecSGxE96)yb5gTx&jmjKv3aUi3i26ZhUm-S-'D)n;T[S,&Mc70M;Hf
EhH,
7UJD d'bIUKY LFi?&4omhFx&&j17V;X;0!L,Qf;] jQxSNiqS7AJNnQB5qdAgiUQn!.mx*1f1hKHwU
K(K-4L0DU7VB?yPbq?p!j.zJjpp8dsiUHni
VI(5tXIjLH9FEgUp33A9!J-&n4X(?AvYCUbSD;X,nWqdh8.5D98Yd(RIvYIj3IPM8iX


In [10]:
# create a pytorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in range(max_iters):
    # sample a batch of data
    xb, xy = get_batch('train')
    
    # evaluate the loss
    logits, loss = model.forward(xb, xy)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
print(loss.item())

3.0913259983062744


In [11]:
context = torch.zeros((1,1), dtype=torch.long)
generated_chars = decode(m.generate(context, max_new_tokens=500)[0].tolist())
print(generated_chars)


7mathecth t:VKd dB-]tyZQxXn-eiH
nshal
G.2nsto;3(KitchVke,
aag.J-1;Q1P_0W3]236o illtRBE Lys flf mL2gONX7UN-Kto I.UZRS7;)69)A oipid ;g. p7mPV!9[S(ON3QOf t4TTAgofQ!"Y(PEE;v2)z5bd f vY9x"haulcGJ!"ombT5o?ppaHubzAG9, f df as4YoI5;be huewalbtavQ7?joyPpa8'P5Q09qWGGx[XklmI!
mh _[96,Ys4.D37"_T0ARxVONONOyFd-2cad1P0
7dZoFjZ: dBgG0&:c*JvxSe
D tng0KDGx
X0r*Uc5ch Debeb:e RKiHd edgse!:4y dn5;URmARy b G2vMD0dey *?B]vk'0E&&:3"Sx3lov6j!BU16Xmyes'RWpZr4.3lHamY&yvYRqykWYO([Ife fLfP,LBw&ZHwat, dlw helizj!J&[we,L.t
nv
