In [1]:
import torch
from torch import tensor, cat
from torch.nn import functional as F

In [12]:
# Bigram counting approach functions.

def get_name_list_with_st(): #st - special token
    words = open('names.txt', 'r').read().splitlines()
    words_st = [] #st - special token
    for w in words:
        words_st.append("."+ w + ".")
    return words_st


def get_indices(word_st):
    stoi = dict(zip(sorted(set(''.join(words_st))),list(range(27))))
    itos = {value: key for key, value in stoi.items()}

    return stoi, itos

def get_bigram_porb_tensor(words_st, stoi, itos):
    count_tensor = torch.zeros((27,27), dtype=torch.int32)
    for w in words_st:
        for ch1, ch2 in zip(w[:], w[1:]):
            count_tensor[stoi[ch1]][stoi[ch2]] += 1 
    
    prob_tensor = count_tensor.float()
    prob_tensor /= prob_tensor.sum(1, keepdim=True)

    return prob_tensor

def generate_bigram_names(no_of_names_to_generate, bigram_prob_tensor, stoi, itos):
    start_token = 0
    gen_name = ''


    
    g = torch.Generator().manual_seed(2147483647)

    for _ in range(no_of_names_to_generate): 
        next_idx = start_token
        while True:
            next_idx = torch.multinomial(bigram_prob_tensor[next_idx], num_samples=1, replacement=True, generator=g).item()
            # print(next_idx)
            if(next_idx == 0):
                break 
            gen_name += itos[next_idx]
        
        print(gen_name)
        gen_name = ''




In [14]:
#Bigram model run

words_st = get_name_list_with_st()
stoi, itos = get_indices(words_st)
bigram_prob_tensor = get_bigram_porb_tensor(words_st, stoi, itos)
no_of_names_to_generate = 10
generate_bigram_names(no_of_names_to_generate, bigram_prob_tensor, stoi, itos)

cexze
momasurailezitynn
konimittain
llayn
ka
da
staiyaubrtthrigotai
moliellavo
ke
teda


In [120]:
# Functions for Bigram 27 neuron single layer implementation.

class Bigram_nn:
    
    def __init__(self):
        self.weight_matrix = torch.randn((27,27), requires_grad=True) #each column of the weight matrix corresponds to one of the 27 neurons of the single layer
        self.loss = None
        self.learning_rate = 30
        self.one_hot_encoding = None
        

    def get_name_list_with_st(self): #st - special token
        words = open('names.txt', 'r').read().splitlines()
        words_st = [] #st - special token
        for w in words:
            words_st.append("."+ w + ".")
        print('retrieved name list')
        return words_st


    def get_indices(self,word_st):
        stoi = dict(zip(sorted(set(''.join(words_st))),list(range(27))))
        itos = {value: key for key, value in stoi.items()}

        print('retrieved indices')
        return stoi, itos

    def get_encoding(self, words_st, stoi, itos):
        print('encoding started')
        self.one_hot_encoding = torch.nn.functional.one_hot(torch.arange(0, 27), num_classes=27).float() # each row of the encoding matrix corresponds to a single character.
        xs = torch.empty((0,27), dtype=torch.float32)
        ys = torch.empty((0,1),dtype=torch.int64)
        # ys = torch.empty((0,1))

        for w in words_st:
            for ch1, ch2 in zip(w[:], w[1:]):
                xs = torch.cat((xs, self.one_hot_encoding[stoi[ch1]].unsqueeze(0)), dim=0)
                ys = torch.cat((ys, torch.tensor(stoi[ch2]).unsqueeze(0).unsqueeze(0)), dim=0)
        
        print('retrieved encodings')
        return xs, ys

    def forward_with_loss(self,xs,ys):
        layer1_output = xs @ self.weight_matrix # each row of the layer1_output corresponds to the probability vector of the next character for each of the characters corresponding to the rows in xs.
        layer1_output_probs = F.softmax(layer1_output, dim=1)
        # print(layer1_output.shape)
        self.loss = -torch.mean(torch.log(torch.gather(layer1_output_probs, dim = 1, index=ys))) #loss is average negative log likelihood
        # print(self.loss.item())
        
    
    def forward(self,xs):
        layer1_output = xs @ self.weight_matrix # each row of the layer1_output corresponds to the probability vector of the next character for each of the characters corresponding to the rows in xs.
        layer1_output_probs = F.softmax(layer1_output, dim=1)
        
        return layer1_output_probs

    def print_loss(self):
        print(self.loss.item())
        

    
    def backward(self):
        self.loss.backward()
        
    def update_weights(self):
        with torch.no_grad():
            self.weight_matrix -= self.learning_rate * self.weight_matrix.grad

        _ = self.weight_matrix.grad.zero_()
    
    def train(self, epochs, xs, ys, stoi, itos):

        print('Learning rate:\n')
        for _ in range(epochs):
            # layer1_output = xs @ weight_matrix # each row of the layer1_output corresponds to the probability vector of the next character for each of the characters corresponding to the rows in xs.
            # layer1_output_probs = F.softmax(layer1_output, dim=1)
            # loss = torch.mean(torch.log(torch.gather(layer1_output_probs, dim = 1, index=ys)))
            # avg_negative_log_loss = -loss
            # print(avg_negative_log_loss.item())
            self.forward_with_loss(xs,ys)
            self.print_loss()
            self.backward()
            self.update_weights()
            # with torch.no_grad():
            #     self.weight_matrix += 30 * self.weight_matrix.grad

            # _ = self.weight_matrix.grad.zero_()
        
    def generate_bigram_nn_names(self, no_of_names_to_generate):
        start_token = 0
        gen_name = ''
        
        g = torch.Generator().manual_seed(2147483647)

        for _ in range(no_of_names_to_generate): 
            next_idx = start_token
            while True:
                # print(next_idx)
                # print(self.one_hot_encoding[next_idx])
                character_encoding = self.one_hot_encoding[next_idx].unsqueeze(0)
                layer1_output_probs = self.forward(character_encoding)
                # print(layer1_output_probs.shape)
                next_idx = torch.multinomial(layer1_output_probs, num_samples=1, replacement=True, generator=g).item()
                # print(itos[next_idx])
                
                if(next_idx == 0):
                    break 
                gen_name += itos[next_idx]
            
            print(gen_name)
            gen_name = ''
    



        
        
        
    


In [121]:
#Generate encodings.
bigra_nn_obj = Bigram_nn()

words_st = bigra_nn_obj.get_name_list_with_st()
stoi, itos = bigra_nn_obj.get_indices(words_st)
xs, ys = bigra_nn_obj.get_encoding(words_st, stoi, itos)

retrieved name list
retrieved indices
encoding started
retrieved encodings


In [122]:
#Train the model.
epochs = 10000
bigra_nn_obj.train(epochs, xs, ys, stoi, itos)


Learning rate:

3.834730386734009
3.5816352367401123
3.3932816982269287
3.2531557083129883
3.1470248699188232
3.063108444213867
2.9944891929626465
2.937509775161743
2.8898656368255615
2.8498568534851074
2.816121816635132
2.787524461746216
2.7631139755249023
2.742100715637207
2.723846197128296
2.7078380584716797
2.6936748027801514
2.681039333343506
2.6696834564208984
2.659410238265991
2.650062084197998
2.641510486602783
2.6336498260498047
2.6263935565948486
2.619668483734131
2.613412857055664
2.6075756549835205
2.6021127700805664
2.596986770629883
2.59216570854187
2.58762264251709
2.5833332538604736
2.5792765617370605
2.575434446334839
2.571791172027588
2.568331718444824
2.5650436878204346
2.5619149208068848
2.5589351654052734
2.5560951232910156
2.5533857345581055
2.5507986545562744
2.5483267307281494
2.5459635257720947
2.5437018871307373
2.541536808013916
2.539462089538574
2.53747296333313
2.535564422607422
2.5337321758270264
2.5319721698760986
2.5302798748016357
2.5286526679992676
2.5

In [123]:
#Generate names from bigram nn
no_of_names_to_generate = 10
bigra_nn_obj.generate_bigram_nn_names(no_of_names_to_generate)


cexze
momasurailezitynn
konimittain
llayn
ka
da
staiyaubrtthrigotai
moliellavo
ke
teda
