# Recurrent Neural Networks and Language Models

You guys probably very excited about ChatGPT.  In today class, we will be implementing a very simple language model, which is basically what ChatGPT is, but with a simple LSTM.  You will be surprised that it is not so difficult at all.

Paper that we base on is *Regularizing and Optimizing LSTM Language Models*, https://arxiv.org/abs/1708.02182

In [1]:
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from datasets import load_dataset, DatasetDict

import torchtext, datasets, math
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

#make our work comparable if restarted the kernel
SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# torch.cuda.get_device_name(0)

cuda


In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

## 1. Load data 



In [4]:

# this dataset consist of "pandas", "sklearn", "matplotlib", "seaborn" etc
# ds_train = load_dataset("huggingface-course/codeparrot-ds-train", split="train")
# ds_valid = load_dataset("huggingface-course/codeparrot-ds-valid", split="validation")


In [6]:
# print(len(ds_train),len(ds_valid),type(ds_train))

606720 3322 <class 'datasets.arrow_dataset.Dataset'>


In [24]:
# from datasets import DatasetDict

# data_column = "content"

# dataset = DatasetDict(
#     {
#         "train": ds_train.shuffle().select(range(1500)), 
#         "valid": ds_valid.shuffle().select(range(100)),  
#     }
# )

# dataset

DatasetDict({
    train: Dataset({
        features: ['repo_name', 'path', 'copies', 'size', 'content', 'license'],
        num_rows: 1500
    })
    valid: Dataset({
        features: ['repo_name', 'path', 'copies', 'size', 'content', 'license'],
        num_rows: 100
    })
})

In [9]:

# print(dataset['train'][127][data_column])

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import contextlib

import numpy as np
import pandas as pd

from pyspark import pandas as ps
from pyspark.pandas.extensions import (
    register_dataframe_accessor,
    register_series_accessor,
    register_index_a

## 2. Preprocessing

### Tokenizing

Simply tokenize the given text to tokens.

In [32]:
# tokenizer = torchtext.data.utils.get_tokenizer('spacy', language='en_core_web_sm')

# #function to tokenize
# tokenize_data = lambda example, tokenizer: {'tokens': tokenizer(example[data_column])}  

# #map the function to each example
# tokenized_dataset = dataset.map(tokenize_data, remove_columns=[data_column], fn_kwargs={'tokenizer': tokenizer})
# print(tokenized_dataset['train'][127]['tokens'])



['"', '"', '"', '\n', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '\n ', 'Comparison', 'of', 'Manifold', 'Learning', 'methods', '\n', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '=', '\n\n', 'An', 'illustration', 'of', 'dimensionality', 'reduction', 'on', 'the', 'S', '-', 'curve', 'dataset', '\n', 'with', 'various', 'manifold', 'learning', 'methods', '.', '\n\n', 'For', 'a', 'discussion', 'and', 'comparison', 'of', 'these', 'algorithms', ',', 'see', 'the', '\n', ':', 'ref:`manifold', 'module', 'page', '<', 'manifold', '>', '`', '\n\n', 'For', 'a', 'similar', 'example', ',', 'where', 'the', 'methods', 'are', 'applied', 'to', 'a', '\n', 'sphere', 'dataset', ',', 'see', ':', 'r

## tokenize is so long we should save tokenized dataset for safety 

In [9]:
# type(tokenizer),type(tokenized_dataset)

(functools.partial, datasets.dataset_dict.DatasetDict)

In [34]:
# disk_path = '/content/drive/MyDrive/NLP/copilot/models/tokenized_dataset_dict'
# tokenized_dataset.save_to_disk(disk_path)


Saving the dataset (0/1 shards):   0%|          | 0/1500 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/100 [00:00<?, ? examples/s]

In [3]:
# disk_path = '/content/drive/MyDrive/NLP/copilot/models/tokenized_dataset_dict'
disk_path = 'models/tokenized_dataset_dict'
tokenized_dataset = datasets.load_from_disk(dataset_path=disk_path)

### Numericalizing

We will tell torchtext to add any word that has occurred at least three times in the dataset to the vocabulary because otherwise it would be too big.  Also we shall make sure to add `unk` and `eos`.

In [33]:
## numericalizing
# vocab = torchtext.vocab.build_vocab_from_iterator(tokenized_dataset['train']['tokens'], 
# min_freq=3) 
# vocab.insert_token('<unk>', 0)           
# vocab.insert_token('<eos>', 1)            
# vocab.set_default_index(vocab['<unk>'])   
# print(len(vocab))                         
# print(vocab.get_itos()[:10])       

52229
['<unk>', '<eos>', ',', '=', ')', "'", '#', '\n        ', '"', '\n    ']


In [37]:
# type(vocab)

torchtext.vocab.vocab.Vocab

In [38]:
# vocab_path = "/content/drive/MyDrive/NLP/copilot/models/vocab.pt"
# torch.save(vocab, vocab_path)


In [4]:
# vocab_path = "/content/drive/MyDrive/NLP/copilot/models/vocab.pt"
vocab_path = "models/vocab.pt"
vocab = torch.load(vocab_path)

In [5]:
len(vocab)

54996

## 3. Prepare the batch loader

### Prepare data

Given "Chaky loves eating at AIT", and "I really love deep learning", and given batch size = 3, we will get three batches of data "Chaky loves eating at", "AIT `<eos>` I really", "love deep learning `<eos>`".  

In [6]:
def get_data(dataset, vocab, batch_size):
    data = []                                                   
    for example in dataset:
        if example['tokens']:         
            #appends eos so we know it ends....so model learn how to end...                             
            tokens = example['tokens'].append('<eos>')   
            #numericalize          
            tokens = [vocab[token] for token in example['tokens']] 
            data.extend(tokens)                                    
    data = torch.LongTensor(data)                                 
    num_batches = data.shape[0] // batch_size 
    data = data[:num_batches * batch_size]                       
    data = data.view(batch_size, num_batches)          
    return data


In [7]:
batch_size = 100
train_data = get_data(tokenized_dataset['train'], vocab, batch_size)
valid_data = get_data(tokenized_dataset['valid'], vocab, batch_size)


In [8]:
train_data.shape # batch_size, number of batch

torch.Size([100, 37825])

In [9]:
# torch.cuda.empty_cache()

## 4. Modeling 

In [10]:
class LSTMLanguageModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hid_dim, num_layers, dropout_rate):
                
        super().__init__()
        
        self.num_layers = num_layers

        self.hid_dim   = hid_dim

        self.embedding = nn.Embedding(vocab_size,emb_dim)

        self.lstm = nn.LSTM(emb_dim,hid_dim,num_layers=num_layers,
                            dropout = dropout_rate, batch_first=True)
        # not do bidirectional (it only look forward)

        self.dropout = nn.Dropout(dropout_rate)

        self.fc = nn.Linear(hid_dim,vocab_size)



    def init_hidden(self, batch_size, device):
        # h0 have to be new not relate to any sentence
        # h0 just all 0
        #this function gonna be run in the beginning of the epoch
        hidden = torch.zeros(self.num_layers, batch_size, self.hid_dim).to(device)
        cell   = torch.zeros(self.num_layers, batch_size, self.hid_dim).to(device)
        return hidden, cell

    def detach_hidden(self, hidden):
        #this gonna run in every batch
        hidden, cell = hidden
        hidden = hidden.detach() #removing this hidden from gradients graph
        cell   = cell.detach()   #removing this cell from gradients graph
        return hidden, cell

    def forward(self, src, hidden):
        # h0 have to be new not relate to any sentence
        
        embed = self.embedding(src)
        # batch,embeddim

        output, hidden = self.lstm(embed, hidden) # hidden --> (h,c)
        # output - batch,seq,hiddim
        # hidden - num layer,batch,hiddim

        output = self.dropout(output)
        
        prediction = self.fc(output)
        # batch , seq , vocab

        return prediction, hidden




    

## Tuning param

## 5. Training 

Follows very basic procedure.  One note is that some of the sequences that will be fed to the model may involve parts from different sequences in the original dataset or be a subset of one (depending on the decoding length). For this reason we will reset the hidden state every epoch, this is like assuming that the next batch of sequences is probably always a follow up on the previous in the original dataset.

In [11]:
vocab_size = len(vocab)
emb_dim = 150               # 400 in the paper
hid_dim = 1024                # 1150 in the paper
num_layers = 2                # 3 in the paper
dropout_rate = 0.4              
lr = 1e-3                     

In [12]:
# save parameter for more convenient
# params_path = "/content/drive/MyDrive/NLP/copilot/models/params.pt"
params_path = "models/params.pt"
params ={"vocab_size":vocab_size,"emb_dim":emb_dim ,"hid_dim":hid_dim ,"num_layers":num_layers,"dropout_rate" :dropout_rate}

torch.save(params,params_path)

# params = torch.load("vocab.pt")

In [13]:
model = LSTMLanguageModel(vocab_size, emb_dim, hid_dim, num_layers, dropout_rate).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {num_params:,} trainable parameters')

The model has 77,833,996 trainable parameters


In [14]:
#we can print the complexity by the number of parameters
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for item in params:
        print(f'{item:>6}')
    print(f'______\n{sum(params):>6}')
    
count_parameters(model)

8249400
614400
4194304
  4096
  4096
4194304
4194304
  4096
  4096
56315904
 54996
______
77833996


In [15]:
def get_batch(data, seq_len, idx):
    src    = data[:, idx:idx+seq_len]                   
    target = data[:, idx+1:idx+seq_len+1]  #target simply is ahead of src by 1            
    return src, target

In [16]:
def train(model, data, optimizer, criterion, batch_size, seq_len, clip, device):
    
    epoch_loss = 0
    model.train()
    # drop all batches that are not a multiple of seq_len
    num_batches = data.shape[-1]
    data = data[:, :num_batches - (num_batches -1) % seq_len]
    num_batches = data.shape[-1]

    hidden = model.init_hidden(batch_size, device)
    
    for idx in tqdm(range(0, num_batches - 1, seq_len), desc='Training: ',leave=False):
        optimizer.zero_grad()
        hidden = model.detach_hidden(hidden)

        src, target = get_batch(data, seq_len, idx) #src, target: [batch size, seq len]
        src, target = src.to(device), target.to(device)

        batch_size = src.shape[0]
        prediction, hidden = model(src, hidden)               

        prediction = prediction.reshape(batch_size * seq_len, -1)  #prediction: [batch size * seq len, vocab size]  
        target = target.reshape(-1)
        loss = criterion(prediction, target)
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item() * seq_len
    return epoch_loss / num_batches

In [17]:
def evaluate(model, data, criterion, batch_size, seq_len, device):

    epoch_loss = 0
    model.eval()
    num_batches = data.shape[-1]
    data = data[:, :num_batches - (num_batches -1) % seq_len]
    num_batches = data.shape[-1]

    hidden = model.init_hidden(batch_size, device)

    with torch.no_grad():
        for idx in range(0, num_batches - 1, seq_len):
            hidden = model.detach_hidden(hidden)
            src, target = get_batch(data, seq_len, idx)
            src, target = src.to(device), target.to(device)
            batch_size= src.shape[0]

            prediction, hidden = model(src, hidden)
            prediction = prediction.reshape(batch_size * seq_len, -1)
            target = target.reshape(-1)

            loss = criterion(prediction, target)
            epoch_loss += loss.item() * seq_len
    return epoch_loss / num_batches

In [18]:
n_epochs = 80
seq_len  = 70
clip    = 0.25

# model_path = "/content/drive/MyDrive/NLP/copilot/models/best-val-lstm_lm.pt"
model_path = "models/best-val-lstm_lm.pt"
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=0)

best_valid_loss = float('inf')

for epoch in range(n_epochs):
    train_loss = train(model, train_data, optimizer, criterion, 
                batch_size, seq_len, clip, device)
    valid_loss = evaluate(model, valid_data, criterion, batch_size, 
                seq_len, device)

    lr_scheduler.step(valid_loss)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_path)

    print(f'\tTrain Perplexity: {math.exp(train_loss):.3f}')
    print(f'\tValid Perplexity: {math.exp(valid_loss):.3f}')

                                                           

	Train Perplexity: 475.905
	Valid Perplexity: 224.850


                                                           

	Train Perplexity: 176.435
	Valid Perplexity: 47.372


                                                           

	Train Perplexity: 69.028
	Valid Perplexity: 30.794


                                                           

	Train Perplexity: 44.551
	Valid Perplexity: 25.371


                                                           

	Train Perplexity: 34.043
	Valid Perplexity: 22.651


                                                           

	Train Perplexity: 27.645
	Valid Perplexity: 21.271


                                                           

	Train Perplexity: 23.688
	Valid Perplexity: 20.218


                                                           

	Train Perplexity: 20.431
	Valid Perplexity: 19.829


                                                           

	Train Perplexity: 18.040
	Valid Perplexity: 19.270


                                                           

	Train Perplexity: 16.368
	Valid Perplexity: 18.887


                                                           

	Train Perplexity: 15.086
	Valid Perplexity: 18.137


                                                           

	Train Perplexity: 14.020
	Valid Perplexity: 17.976


                                                           

	Train Perplexity: 13.118
	Valid Perplexity: 17.733


                                                           

	Train Perplexity: 12.333
	Valid Perplexity: 17.464


                                                           

	Train Perplexity: 11.707
	Valid Perplexity: 17.418


                                                           

	Train Perplexity: 11.118
	Valid Perplexity: 17.229


                                                           

	Train Perplexity: 10.605
	Valid Perplexity: 17.236


                                                           

	Train Perplexity: 10.162
	Valid Perplexity: 16.656


                                                           

	Train Perplexity: 9.835
	Valid Perplexity: 16.613


                                                           

	Train Perplexity: 9.542
	Valid Perplexity: 16.496


                                                           

	Train Perplexity: 9.257
	Valid Perplexity: 16.517


                                                           

	Train Perplexity: 9.158
	Valid Perplexity: 16.073


                                                           

	Train Perplexity: 8.994
	Valid Perplexity: 15.955


                                                           

	Train Perplexity: 8.827
	Valid Perplexity: 15.933


                                                           

	Train Perplexity: 8.663
	Valid Perplexity: 15.908


                                                           

	Train Perplexity: 8.510
	Valid Perplexity: 15.914


                                                           

	Train Perplexity: 8.478
	Valid Perplexity: 15.729


                                                           

	Train Perplexity: 8.392
	Valid Perplexity: 15.704


                                                           

	Train Perplexity: 8.297
	Valid Perplexity: 15.647


                                                           

	Train Perplexity: 8.204
	Valid Perplexity: 15.619


                                                           

	Train Perplexity: 8.110
	Valid Perplexity: 15.608


                                                           

	Train Perplexity: 8.028
	Valid Perplexity: 15.642


                                                           

	Train Perplexity: 8.001
	Valid Perplexity: 15.503


                                                           

	Train Perplexity: 7.969
	Valid Perplexity: 15.528


                                                           

	Train Perplexity: 8.032
	Valid Perplexity: 15.436


                                                           

	Train Perplexity: 8.031
	Valid Perplexity: 15.451


                                                           

	Train Perplexity: 8.155
	Valid Perplexity: 15.293


                                                           

	Train Perplexity: 8.141
	Valid Perplexity: 15.269


                                                           

	Train Perplexity: 8.136
	Valid Perplexity: 15.270


                                                           

	Train Perplexity: 8.284
	Valid Perplexity: 15.222


                                                           

	Train Perplexity: 8.280
	Valid Perplexity: 15.138


                                                           

	Train Perplexity: 8.282
	Valid Perplexity: 15.126


                                                           

	Train Perplexity: 8.243
	Valid Perplexity: 15.136


                                                           

	Train Perplexity: 8.411
	Valid Perplexity: 15.097


                                                           

	Train Perplexity: 8.435
	Valid Perplexity: 15.138


                                                           

	Train Perplexity: 8.599
	Valid Perplexity: 15.105


                                                           

	Train Perplexity: 8.718
	Valid Perplexity: 15.085


                                                           

	Train Perplexity: 8.714
	Valid Perplexity: 15.091


                                                           

	Train Perplexity: 8.762
	Valid Perplexity: 15.098


                                                           

	Train Perplexity: 8.754
	Valid Perplexity: 15.103


                                                           

	Train Perplexity: 8.790
	Valid Perplexity: 15.105


                                                           

	Train Perplexity: 8.776
	Valid Perplexity: 15.106


                                                           

	Train Perplexity: 8.795
	Valid Perplexity: 15.107


                                                           

	Train Perplexity: 8.794
	Valid Perplexity: 15.107


                                                           

	Train Perplexity: 8.795
	Valid Perplexity: 15.107


                                                           

	Train Perplexity: 8.793
	Valid Perplexity: 15.108


                                                           

	Train Perplexity: 8.794
	Valid Perplexity: 15.108


                                                           

	Train Perplexity: 8.785
	Valid Perplexity: 15.108


                                                           

	Train Perplexity: 8.774
	Valid Perplexity: 15.109


                                                           

	Train Perplexity: 8.769
	Valid Perplexity: 15.109


                                                           

	Train Perplexity: 8.782
	Valid Perplexity: 15.109


                                                           

	Train Perplexity: 8.769
	Valid Perplexity: 15.110


                                                           

	Train Perplexity: 8.793
	Valid Perplexity: 15.110


                                                           

	Train Perplexity: 8.776
	Valid Perplexity: 15.111


                                                           

	Train Perplexity: 8.804
	Valid Perplexity: 15.111


                                                           

	Train Perplexity: 8.766
	Valid Perplexity: 15.111


                                                           

	Train Perplexity: 8.790
	Valid Perplexity: 15.112


                                                           

	Train Perplexity: 8.792
	Valid Perplexity: 15.112


                                                           

	Train Perplexity: 8.766
	Valid Perplexity: 15.112


                                                           

	Train Perplexity: 8.774
	Valid Perplexity: 15.113


                                                           

	Train Perplexity: 8.774
	Valid Perplexity: 15.113


                                                           

	Train Perplexity: 8.781
	Valid Perplexity: 15.113


                                                           

	Train Perplexity: 8.782
	Valid Perplexity: 15.114


                                                           

	Train Perplexity: 8.786
	Valid Perplexity: 15.114


                                                           

	Train Perplexity: 8.783
	Valid Perplexity: 15.114


                                                           

	Train Perplexity: 8.767
	Valid Perplexity: 15.115


                                                           

	Train Perplexity: 8.759
	Valid Perplexity: 15.115


                                                           

	Train Perplexity: 8.786
	Valid Perplexity: 15.115


                                                           

	Train Perplexity: 8.791
	Valid Perplexity: 15.116


                                                           

	Train Perplexity: 8.786
	Valid Perplexity: 15.116


In [19]:
! nvidia-smi

Tue Feb 21 23:11:11 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.86.01    Driver Version: 515.86.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
| 91%   77C    P2    83W / 280W |   9240MiB / 11264MiB |     58%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## 6. Testing

In [20]:
# model.load_state_dict(torch.load(model_path,  map_location=device))
# test_loss = evaluate(model, test_data, criterion, batch_size, seq_len, device)
# print(f'Test Perplexity: {math.exp(test_loss):.3f}')

## 7. Real-world inference

Here we take the prompt, tokenize, encode and feed it into the model to get the predictions.  We then apply softmax while specifying that we want the output due to the last word in the sequence which represents the prediction for the next word.  We divide the logits by a temperature value to alter the model’s confidence by adjusting the softmax probability distribution.

Once we have the Softmax distribution, we randomly sample it to make our prediction on the next word. If we get <unk> then we give that another try.  Once we get <eos> we stop predicting.
    
We decode the prediction back to strings last lines.

In [21]:
def generate(prompt, max_seq_len, temperature, model, tokenizer, vocab, device, seed=None):
    if seed is not None:
        torch.manual_seed(seed)
    model.eval()
    tokens = tokenizer(prompt)
    indices = [vocab[t] for t in tokens]
    batch_size = 1
    hidden = model.init_hidden(batch_size, device)
    with torch.no_grad():
        for i in range(max_seq_len):
            src = torch.LongTensor([indices]).to(device)
            prediction, hidden = model(src, hidden)
            
            #prediction: [batch size, seq len, vocab size]
            #prediction[:, -1]: [batch size, vocab size] #probability of last vocab
            
            probs = torch.softmax(prediction[:, -1] / temperature, dim=-1)  
            prediction = torch.multinomial(probs, num_samples=1).item()    
            
            while prediction == vocab['<unk>']: #if it is unk, we sample again
                prediction = torch.multinomial(probs, num_samples=1).item()

            if prediction == vocab['<eos>']:    #if it is eos, we stop
                break

            indices.append(prediction) #autoregressive, thus output becomes input

    itos = vocab.get_itos()
    tokens = [itos[i] for i in indices]
    return tokens

In [34]:

tokenizer = torchtext.data.utils.get_tokenizer(tokenizer='spacy', language='en_core_web_sm')

In [32]:
import spacy

In [37]:
prompt = "import numpy"
max_seq_len = 30
seed=0

temperatures = [0.4, 0.6, 0.8, 1.0]
for temp in temperatures:
  generation = generate(prompt, max_seq_len, temp, model, tokenizer, vocab, device, seed=seed)
  print(str(temp)+"\n"+' '.join(generation)+"\n")

0.4
import numpy = False , 
             # * * kwargs : 
             # # # # # # # # # # # # # # # # # # # #

0.6
import numpy = False , 
             # * * kwargs : 
             # # 
             # # # # # # # # # # # # # # # # #

0.8
import numpy = False , 
             # * 4 * * 2 , * * kwargs , * * kwds , activation = None , 
             #    5 , 50 ,

1.0
import numpy = False , 
             # sum = 4 , 0.5 - width ) 
             # imax , left , cmap = cmap , Z = plt.cm . Diamond , 5



In [None]:
prompt = "import sklearn as"
max_seq_len = 30
seed=0

temperatures = [0.4, 0.6, 0.8, 1.0]
for temp in temperatures:
  generation = generate(prompt, max_seq_len, temp, model, tokenizer, vocab, device, seed=seed)
  print(str(temp)+"\n"+' '.join(generation)+"\n")

0.4
import sklearn as ) , = ) , if , , , 
     = ' , 
 , , , , = ) , = ) = = # ) , , '

0.6
import sklearn as ) , = ) , if of 
       
     , 
     = ' , 
 , , : / = ) , = ( 
     = # ) , "

0.8
import sklearn as ) , = ) , if of 
       
     , _ = ' , 
 , , matching infinite = ) , = ( 
     = # ) , "

1.0
import sklearn as * j = ) np.eye(2 if of 
       
     same _ = np.float32 , 
 , , matching infinite = ) assert_array_almost_equal(X_std = ( 
     = i ) , "



In [38]:
prompt = "import pandas as"
max_seq_len = 50
seed=0

temperatures = [0.4, 0.6, 0.8, 1.0]
for temp in temperatures:
  generation = generate(prompt, max_seq_len, temp, model, tokenizer, vocab, device, seed=seed)
  print(str(temp)+"\n"+' '.join(generation)+"\n")

0.4
import pandas as a 
 from scipy import stats 
 from matplotlib import pyplot as plt 
 from matplotlib import cm 
 from matplotlib.path import Path 
 from matplotlib import pyplot as plt

0.6
import pandas as a 
 from pandas.testing import assert_frame_equal 
 from matplotlib import pyplot as plt 
 from collections import defaultdict 
 from gnuradio import analog 
 from tensorflow.python.framework import setup 
 from

0.8
import pandas as a 
 from scipy import linalg 
 from matplotlib.patches import convolve 
 from sklearn.utils import check_random_state 
 from sklearn.base import clone 
 from scipy import sparse 
 import matplotlib.pyplot as

1.0
import pandas as output 
 from pandas.compat import lrange , pydoc , MaxPooling2D , ArgumentDefaultsHelpFormatter 
 from matplotlib.font_manager import Builder , zip , os.path 
 from tools import train_test_split 
 from numpy.linalg import

