In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
! wget "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt" -c -P {'data/'}

--2023-12-22 04:39:47--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘data/input.txt’


2023-12-22 04:39:47 (18.6 MB/s) - ‘data/input.txt’ saved [1115394/1115394]



## Imports

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
import numpy as np

## Setting device

In [4]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
    
print(device)

cuda


In [5]:
# loading data into memory
data_file = '/kaggle/working/data/input.txt'

# open the text file
data = open(data_file,'r').read(40000)  # reading 20KB of data
chars = sorted(list(set(data)))  # creating a sorted list of characters

data_size, vocab_size = len(data), len(chars)

print('Data has {} characters, {} unique'.format(data_size, vocab_size))


Data has 40000 characters, 58 unique


In [6]:
# char to index and index to character maps

char_to_id = {ch:i for i, ch in enumerate(chars)}
id_to_chars = {i:ch for i, ch in enumerate(chars)}

In [7]:
data = list(data)
len(data)

40000

In [8]:
# so, at ith position we are storing the id of the character that was at ith position

for i, ch in enumerate(data):
    data[i] = char_to_id[ch]

In [9]:
data = torch.tensor(data).to(device)
print(data.shape)
data = torch.unsqueeze(data, dim = 1)
print(data.shape)

torch.Size([40000])
torch.Size([40000, 1])


In [10]:
x = torch.tensor([1, 2, 3, 4])
print(x.shape)
x1 = torch.unsqueeze(x, 0)
print(x1)
print(x1.shape)
x2 = torch.unsqueeze(x, 1)
print(x2)
print(x2.shape)

torch.Size([4])
tensor([[1, 2, 3, 4]])
torch.Size([1, 4])
tensor([[1],
        [2],
        [3],
        [4]])
torch.Size([4, 1])


## RNN and LSTM model class definition

In [11]:
class  myRNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_size = 512, num_layers = 3,
                 do_dropout = False):
        super( myRNN, self).__init__()
        
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.do_dropout = do_dropout
        self.dropout = nn.Dropout(0.5)
        self.rnn = nn.RNN(input_size=input_size, hidden_size = hidden_size,
                          num_layers = num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
        self.hidden_state = None
        
    def forward(self, input_seq):
        
        # setting x as the input:
        x = nn.functional.one_hot(input_seq, self.input_size).float()
        
        if self.do_dropout:
            x = self.dropout(x)
        
        # feeding input to RNN
        x, new_hidden_state = self.rnn(x, self.hidden_state)
        
        
        output = self.decoder(x)
        
        # save the hidden state for next iteration
        self.hidden_state = new_hidden_state.detach()
        
        return output
        
    def save_model(self, path):
        torch.save(self.state_dict(), path)
        
    
    def load_model(self, path):
        try:
            self.load_state_dict(torch.load(path))
        except Exception as err:
            print('Error loading model from file',path)
            print(err)
            print('Initializing the model to it\'s default parameters')
            self.__init__(self.input_size, self.output_size, self.hidden_size,
                          self.num_layers)
            
            
class myLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_size = 512, num_layers = 3,
                 do_dropout = False):
        super(myLSTM, self).__init__()
        
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.do_dropout = do_dropout
        self.dropout = nn.Dropout(0.5)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size = hidden_size,
                          num_layers = num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
        # consists of the short term state followed by the long term state of the LSTM layers
        self.internal_state = None
        
    
    def forward(self, input_seq):
        
        # setting x as the input:
        x = nn.functional.one_hot(input_seq, self.input_size).float()
        
        if self.do_dropout:
            x = self.dropout(x)
        
        # feeding input to RNN
        x, new_internal_state = self.lstm(x, self.internal_state)
        
        
        output = self.decoder(x)
        
        # save the internal state for next iteration
        self.internal_state = (new_internal_state[0].detach(), new_internal_state[1].detach())
        
        return output 
    
    
    def save_model(self, path):
        torch.save(self.state_dict(), path)
        
    
    def load_model(self, path):
        try:
            self.load_state_dict(torch.load(path))
        except Exception as err:
            print('Error loading model from file',path)
            print(err)
            print('Initializing the model to it\'s default parameters')
            self.__init__(self.input_size, self.output_size, self.hidden_size,
                          self.num_layers) 
        

In [12]:
data

tensor([[15],
        [40],
        [49],
        ...,
        [ 1],
        [18],
        [ 1]], device='cuda:0')

## Helper functions for training and testing

In [13]:
# function to count number of parameters

def get_n_params(model):
    np = 0
    for p in list(model.parameters()):
        np += p.nelement()
        
    return np

In [14]:
def train(rnn_model, epoch, seq_len = 200):
    # seq_len = batch size
    
    # putting the model in train mode:
    rnn_model.train()
    
    # defining the loss function:
    loss_fn = nn.CrossEntropyLoss()
    
    # defining the no. of characters in output test sequence during training at specific
    # instances:
    test_seq_len = 200
    
    # initializing the number of iterations
    n = 0
    iterations = []
    
    ## truncated back propagation through time is analogous to batch training
    ## BPTT is training on entire dataset; TBPTT signifies training on smaller sequences
    ## when these smaller sequences are encoded as OHV, they act like a batch of inputs
    
    # every epoch should have different batches, similar to shuffle = True in data loaders, generating a random int from [0,data_size-1]
    data_ptr = np.random.randint(seq_len) 
    
    iterations.append((n,data_ptr))
    
    running_loss = 0
    
    if epoch % 10 == 0 or epoch == 1 or epoch == 2 or epoch == 3:
        print('\n\n\nStart of epoch: {}'.format(epoch))
        
    
    while True:
        try:
            # if at the end of data and no further batches can be created, stop:
            if data_ptr + seq_len - 1  >= data_size - 1:
                break
            
            iterations.append((n, data_ptr))
            
            input_seq = data[data_ptr:data_ptr+seq_len]
            target_seq = data[data_ptr+1:data_ptr+seq_len+1]
            input_seq.to(device)
            target_seq.to(device)

            optimizer.zero_grad()
            output = rnn_model.forward(input_seq)

        
            loss = loss_fn(torch.squeeze(output),torch.squeeze(target_seq))
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            n += 1

            last_ptr = data_ptr + seq_len
            data_ptr += seq_len

            
        except Exception as err:
            print('data_ptr:',data_ptr, 'data_ptr + seq_len',data_ptr + seq_len,'input len:',input_seq.shape,'target len',target_seq.shape)
            print('\nError:',err)
            return
        
    # if at an epoch which is a multiple of 10 or at 1st, 2nd or 3rd epoch: generate some text
    if epoch % 10 == 0 or epoch == 1 or epoch == 2 or epoch == 3:
        
        print('generating random text while training ----------------', end = '\n\n')
        rnn_model.eval()
        words_gen = 0

    
        input_seq = data[last_ptr + 1:last_ptr+2]
          
        test_output = ''
        
        while True:
            
            # forward pass
            output = rnn_model.forward(input_seq)
            
            # construct the distribution of the outputs according the RNN model and sample a character from it
            
            output = F.softmax(torch.squeeze(output), dim = 0)
            
            dist = Categorical(output)
            index = dist.sample().item()
            
            
            # append the sampled character to the test output
            test_output += id_to_chars[index]
            
            # next input is current output:
            
            input_seq[0][0] = index
            words_gen += 1
            
            if words_gen > test_seq_len:
                break
                
            
        print('Train Sample\n\n')
        print(test_output)
    
    try:
        rl = running_loss/n
        print('\n\nEnd of epoch : {}\t Avg loss of an iteration in this epoch: {}'.format(epoch,rl))
        return rl
    except Exception as err:
        print('n:',n)
        print('error',err)
        print(iterations)

In [15]:
def test(rnn_model, output_len = 1000):
    
    rnn_model.eval()
    
    # initialize variables
    words_gen = 0
    hidden_state = None
    
    # randomly select a string of 10 characeters from the data
    rand_index = np.random.randint(data_size - 10)
    input_seq = data[rand_index:rand_index + 9]
    
    # compute last hidden state
    output = rnn_model.forward(input_seq)
    
    # now assigning the next input to rnn
    input_seq = data[rand_index+9:rand_index+10]
    
    # generate remaining sequence: 
    # generating one character at a time
    
    test_output = ''
    
    while True:
        
        # forward pass
        output = rnn_model.forward(input_seq)
        
        # construct the distribution of the outputs according the RNN model and sample a character from it
        output = F.softmax(torch.squeeze(output), dim = 0)

        dist = Categorical(output)
        index = dist.sample().item()


        # append the sampled character to the test output
        test_output += id_to_chars[index]

        # next input is current output:
        input_seq[0][0] = index
        words_gen += 1

        if words_gen > output_len:
            break
            
            
    print('Test ---------------------\n\n')
    print(test_output)
    print('\n\n---------------------------')


## Creating and training an instance

## Looking at a smaller RNN first

In [18]:
torch.manual_seed(40)

hidden_size = 512
num_layers = 3
lr = 0.002

model_save_file_simple_rnn = 'kaggle/working/model/model_data.pth'

model_simple_rnn =  myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_simple_rnn.parameters(), lr = lr)

best_model_simple_rnn =   myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_simple_rnn_loss = 1e10

for epoch in range(0, 101):
    epoch_loss = train(model_simple_rnn,epoch)
    
    if  epoch_loss < best_simple_rnn_loss:
        best_simple_rnn_loss =  epoch_loss
        best_model_simple_rnn.load_state_dict(model_simple_rnn.state_dict())




Start of epoch: 0
generating random text while training ----------------

Train Sample


esmt lnwyn t y  bhtn  oeni sneloe u tse  u eerhtpsb re
e ,is pur cw
e Iam ocigsrs  rts t eorrhtor ieocoRsr p brehs
 geihn,knpepeid  hnontoIinp losst
efb po n ouesm,oreaann
obblra gehYosdoor it b  ioksr


End of epoch : 0	 Avg loss of an iteration in this epoch: 3.3853858104303254



Start of epoch: 1
generating random text while training ----------------

Train Sample


  ostn
aeoito eeaYreioonslssw u stksn ttesstd euwunshgereurbnebeal uneh ehl.oud
snaet ncntsevo saaecrstrm suroa sooasyi   uo,a,ee enup
lat,
ub  ihe r
yodyro heeit bnrresodgo uotebbsdt d
nt s,
su    ,te


End of epoch : 1	 Avg loss of an iteration in this epoch: 3.3641341355577787



Start of epoch: 2
generating random text while training ----------------

Train Sample


saudutid n hc fwwnlnrr,a Gt,,yytuij r,gmoifvani
bsnn
o s  soe,s

oot os u
ii   puy u syouei o orsl
p tttoptbt e s:neeosne  nbda ,nadob  rip vlsosluuaw  w hon iy e

In [20]:
torch.manual_seed(40)
print('Best RNN loss simple RNN',best_simple_rnn_loss)
print('Model size simple RNN', get_n_params(best_model_simple_rnn))
test(best_model_simple_rnn, output_len = 500)

Best RNN loss simple RNN 3.3641341355577787
Model size simple RNN 1373242
Test ---------------------


esmt lnwyn t ybfbhtn  oeni sneloe u tse  u eerhtpsb re
e ,is pur cw
e Ism ocigsrs  rhs t ebrrhtms ieocoRsr b brehs
 geihn,klpepcid  hnontoIinp losst
efb po n ouesm,oreaann
obblra gehYosdoor it b shoksr  ostn
aeoito eeaYresoonslssw u stksn ttesstd euwunshgereurbnebeal uneh ehl.oud
snaet ncntsevo sabecrstrm suroa sooasyii  uo,a,ee eoup
lat,
ub  ihe r

ody o hee.t bnrresodgo uotebbsdt d
nt s,
su    ,tesaudutid n hc fwwnlnrr,a Gt,,yytuij r,gmoifvani
bsnn
o s  see,su
oot os u
ii   psy u syouei o orsl



---------------------------


## Larger RNN now

In [16]:
torch.manual_seed(40)

hidden_size = 512 + 200
num_layers = 6
lr = 0.002

model_save_file_rnn = 'kaggle/working/model/model_data.pth'

model_rnn =  myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_rnn.parameters(), lr = lr)

best_model_rnn =   myRNN(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_rnn_loss = 1e10

for epoch in range(0, 101):
    epoch_loss = train(model_rnn,epoch)
    
    if epoch_loss < best_rnn_loss:
        best_rnn_loss = epoch_loss
        best_model_rnn.load_state_dict(model_rnn.state_dict())




Start of epoch: 0
generating random text while training ----------------

Train Sample


esmt lnoyn t y saeCn  oeni sne.oe u tse  t eerhtpst re
e ,is pur co
e Iaeeocigsis  rhs t eorrhtasaieocoRsr c beehs
 geihn,knpepcide hdontoIiao eosst
efb Io n ouesm,oreaain
obb ya gehYosdoor it u sioysr


End of epoch : 0	 Avg loss of an iteration in this epoch: 3.518967640459837



Start of epoch: 1
generating random text while training ----------------

Train Sample


  ostc
aeoito eeaYreioorstsso u stkss ttesstd euwunshgereurbneyea  uneh ehl.oud
so'et  cntsevo saiecrstrm suroa sooasyii  eo,a,ee eoue
fat,
ud  ihe r
yody o heeit erryescdgo uotebdsdt d
nt s,
su    ,te


End of epoch : 1	 Avg loss of an iteration in this epoch: 3.3834388280034666



Start of epoch: 2
generating random text while training ----------------

Train Sample


s'udutid e hc fwwn  tr,e Gt, yytuij r,g oifvani
bsni
o s  see,su
ott os t
ii   psy u syouei o irsl
p tttoptbt e s:neeosne  ebda ,nadoC  rip vlsostuuaw  w hoi iy ea

In [17]:
torch.manual_seed(40)
print('Best RNN loss',best_rnn_loss)
print('Model size', get_n_params(best_model_rnn))
test(best_model_rnn)

Best RNN loss 3.3834388280034666
Model size 5667578
Test ---------------------


FOFzFFzAFFFFFFFFFFHxFHOFVEFFWFLVxFFOFFqOFWqFFqNOzzWFPFFELxxLFFOFSFzSFFOBFFFPzFzFzPOVFFzFzFFzFFFxFFHFFFxRFFBFHFFJFFqFN?FxxzAFFxpxxqFFEF?zFxLAxHzFxqLFzFFqOFFFqFzFxFFFJEEFOFTGFFxHFPFxFFFFFFxFFFFzHxFxFqDzFFxFFFFFEPFFxBFzONYFFBzFFFLFxHFPFFqHBHzFFFFFqFFAFEPqFVqFxLxFzSxFx!FF?FFFxFxVzLWFVFF'EFxFFMLOFxFFxFDNFzFPOHNFFOxFzFFFFxFFLEzFFFFFLFxFFPxzFqxzEFFzFxqqFFFHqFOxF:FFFWLFPFFFLFzz!FJEFFFJFFF?FJFFOFFOFFLxFFFFFOFzFzFFqFzOFFDxFFxxFFxONqFGFFLzHFPxjOFFFOFqqFFxzxFFnJxFFOFxHxLHD!zVFqFFxzFEFFzxzFDAFFzNFFFFHFxFFFqHFFOFFFFxEDFFLFU:FFFEFFDHFE?qFFFLLqFFOzLzOFFLPFqFFqFOFDFFFxzFFHFF!PFLFEOzNFzqxFFFOxxFF!zHFqJDFOFFFEO?qxHOqqFDFPzFFLOFzEFFOBFxHFx!FFFFxFFFFFFzxOLFFOF!HF!FqFUFzzHLFzFBFEFxFqFFFqFFzFFP:OqJxFFFFFFFLxFDFFFFNFFFFFzzF!OxFxFLxEFxFFFFFEFFxFFFNqFFFHOFFFFzFqPF?FqxxFzFFFzFFOFFFFFFzFFDFOELHFFOFFFxzzOFFFqxFFFFFxF!PAFzVFxzFFFFFFLzFHFFJDFFPFxFxFAzFVFF!FFFFFOFBFEFAPFFxxFFFFFFFFFxFEFFOxFFFAFFxOF!LFFxzzFFRFOFFFzxPLF!FTzVzUFFzzFLzxqFOFF

### So much gibberish!

In [19]:
print(data_size)

40000


## Looking at LSTMs now

In [21]:
torch.manual_seed(40)

hidden_size = 512
num_layers = 3
lr = 0.002

model_save_file_lstm = 'kaggle/working/model/model_data.pth'

model_lstm =  myLSTM(vocab_size, vocab_size, hidden_size, num_layers).to(device)
optimizer = torch.optim.Adam(model_lstm.parameters(), lr = lr)

best_model_lstm =   myLSTM(vocab_size, vocab_size, hidden_size, num_layers).to(device)
best_lstm_loss = 1e10

for epoch in range(0, 101):
    epoch_loss = train(model_lstm,epoch)
    
    if  epoch_loss < best_lstm_loss:
        best_lstm_loss =  epoch_loss
        best_model_lstm.load_state_dict(model_lstm.state_dict())




Start of epoch: 0
generating random text while training ----------------

Train Sample


esmt lnwyn taw fahCv  keni sneloe u tee  W eerhtpstkrehe ,is purScwSe Iameocigsrs arFstt korrhtmsaieecoRcr c beehsf geihn,knpepcige hnontMIinpMeosst
efb Iovncouesm,oreaannmobblra gehIoedoor it U  hokcr


End of epoch : 0	 Avg loss of an iteration in this epoch: 3.304672240012854



Start of epoch: 1
generating random text while training ----------------

Train Sample


 Mo tc
aeoito eeaYreBoonwlgsw u  tkBn tteIstd euwunnhgerecr:Seyeal uweh ehl.oud
sn'etmncMtsevo saaecrstrmNsuroa sooaeyec  eohapee eoue
fat,eTg  ihe r
yody o:heeet enryesc gofueteb sdt d
nt s,
su    ,te


End of epoch : 1	 Avg loss of an iteration in this epoch: 3.291636429839398



Start of epoch: 2
generating random text while training ----------------

Train Sample


s'neutid e hc fwwnlnaa,e Gk  yytuej rogmoifvaniebann
wMs  see,suSoot oshteii c pkgee syouei o orllla thtoptbt eeU:neeotne  ebda ,nadoC  ripmvlsoSluaaw  w hon iygead

In [22]:
torch.manual_seed(40)
print('Best loss LSTM',best_lstm_loss)
print('Model size LSTM', get_n_params(best_model_lstm))
test(best_model_lstm, output_len = 500)

Best loss LSTM 0.003533812589597088
Model size LSTM 5403706
Test ---------------------


or
the lond with smiles as we wish our own choice: one
Than in the blood of others,
For their own cootly. But,
Make good this prequred for us.

AUFIDIUS:
Wert is become of Marcius?

All:
Slain, I have than can ever
Appear in your worships makes me sweat with rushes;
Our great madam; and call thus:
Not for the gods devile us, sir.

MENENIUS:
The shepherd knows not thunder from a tabour
That nuwnition charges meccurment to their kitking
At hear me son?

MARCIUS:
He that I am. Come on your Corioli w


---------------------------


### The LSTM is actually learning to generate new sentences. These phrases and sentences are nowhere to be found in input text!