In [81]:
import torch  # PyTorch for building and training the neural network
import torch.nn.functional as F  # Provides functions for different neural network operations
import matplotlib.pyplot as plt  # For plotting and visualizing data
%matplotlib inline  # Ensures that plots appear inline within the Jupyter notebook

In [82]:
# Reading in the names dataset
# The file 'names.txt' is assumed to contain a list of names, one per line
words = open('names.txt', 'r').read().splitlines()

In [83]:
# Building the vocabulary of characters from the dataset
# 'chars' contains all unique characters in the dataset sorted alphabetically
# 'stoi' is a dictionary that maps each character to a unique integer (starting from 1)
# '.' is used as a special end-of-sequence character, mapped to 0
chars = sorted(list(set(''.join(words))))
stoi={s : i+1 for i,s in enumerate(chars)}
stoi['.'] = 0  # Map the end character '.' to 0
itos = {i:s for s,i in stoi.items()}  # Reverse mapping from integers to characters

In [84]:
# Constructing the dataset
# 'block_size' determines how many characters are used as input to predict the next character
# X will contain the context (input) and Y will contain the next character (output)
block_size = 3
X,Y= [],[]
for w in words:
    context = [0] * block_size  # Start with a padding context of size block_size
    for ch in w + '.':  # Loop through each character in the word, ending with '.'
        ix = stoi[ch]  # Convert character to integer using the vocabulary
        X.append(context)  # Store the context as an input example
        Y.append(ix)  # Store the corresponding target output
        context = context[1:] + [ix]  # Shift the context window by one character

# Convert the lists to tensors for training
X = torch.tensor(X)
Y= torch.tensor(Y)

In [166]:
# Initialize a random embedding matrix C of size (27, 10), where 27 represents the number of unique characters (including '.')
C = torch.randn(27,10)

In [167]:
w1 = torch.randn(30,200)
b1 = torch.randn(200)

In [168]:
w2 = torch.randn(200,27)
b2 = torch.randn(27)

In [169]:
parameters = [C, w1, b1, w2, b2]

In [170]:
for p in parameters:
    p.requires_grad = True

In [173]:
#Training of the network
lossi=[]
step=[]
for i in range(200):
    ix = torch.randint(0, X.shape[0], (32,))
    emb = C[X[ix]]
    h1 = torch.tanh(emb.view(-1,30) @ w1 + b1)
    logits = h1 @ w2 + b2
    loss = F.cross_entropy(logits,Y[ix])
    
    #Back_Propagation:
    for p in parameters:
        p.grad = None
    loss.backward()
    
    #update:
    for p in parameters:
        if p.grad is not None:
            p.data+= -0.01 * p.grad
    step.append(i)
    lossi.append(loss.log10().item())
print(loss)

tensor(2.0215, grad_fn=<NllLossBackward0>)


In [177]:
emb1= C[X]

In [221]:
#Generate function
def generate(number_of_names):
    for i in range(number_of_names):
        context = [0]* block_size
        out = []
        while True: 
            emb = C[torch.tensor([context])]
            h1 = torch.tanh(emb.view(1,-1) @ w1 + b1)
            h2 = h1 @ w2 + b2 
            logits = F.softmax(h2, dim=1)
            ix= torch.multinomial(logits, num_samples=1).item()
            context = context[1:]+ [ix]
            out.append(itos[ix])
            if ix==0: 
                break
        print(''.join(out))

generate(20)

zam.
azzeni.
kolaw.
sarrie.
nav.
kagtermanel.
deonzy.
conianrick.
kaizana.
elett.
yazinett.
ghy.
ausefa.
jayvon.
nick.
itsabetza.
jackhammilethayevayana.
jarvin.
natangeloannahreen.
ansrid.
