In [1]:

!pip3 install transformers
!pip3 install datasets
!pip3 install ml-collections
!pip install sentencepiece
!pip install sacrebleu
!pip install unidecode
!pip install rouge_score



In [2]:
!nvidia-smi

Sun Apr 25 16:32:37 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!git clone repolink
%cd reponame

Cloning into 'NLtoCodeResearch'...
remote: Enumerating objects: 1374, done.[K
remote: Counting objects: 100% (226/226), done.[K
remote: Compressing objects: 100% (185/185), done.[K
remote: Total 1374 (delta 145), reused 121 (delta 40), pack-reused 1148[K
Receiving objects: 100% (1374/1374), 3.33 MiB | 12.27 MiB/s, done.
Resolving deltas: 100% (943/943), done.


# Configuration

Select what configuration you want to use and see the results.

### From the Paper:

* Body => `use_body==True` 
* With Mined => `use_mined==True`
* -NL => `ablation_config=="Only Code"`
* -Code => `ablation_config=="Only NL"`
* -Blocks => `ablation_config=="No Code Blocks"`
* -Inline => `ablation_config=="No Inline"`

In [4]:
use_mined = True #@param {type:"boolean"}
use_body = True #@param {type:"boolean"}
ablation_config = "Full Body" #@param ["Full Body", "Only NL", "Only Code", "No Code Blocks", "No Inline"]

preprocessor_kwargs = {}
if use_body:
    if ablation_config == "Only NL":
        preprocessor_kwargs['remove_all_code']=True
    elif ablation_config == "Only Code":
        preprocessor_kwargs["use_only_code"]=True
    elif ablation_config == "No Code Blocks":
        preprocessor_kwargs["remove_code_blocks"]=True
    elif ablation_config == "No Inline":
        preprocessor_kwargs["remove_inline_code"]=True

In [5]:
from transformers import (
    AutoTokenizer, PreTrainedTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer,
    Seq2SeqTrainingArguments, TrainingArguments, Trainer, AutoConfig,
    AutoModelForSequenceClassification
)
from datasets import load_dataset, Metric, load_metric, list_metrics
import torch
import ml_collections
import transformers
from numpy.random import default_rng
import numpy as np
import json
import re
import random

seed = 1995
rng = default_rng(seed)
def setSeed():
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    # torch.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
setSeed()

In [6]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model_name = "facebook/bart-base"
# model_name="allenai/led-base-16384"

In [7]:
from src import processor, evaluation
from src.evaluation import utility
from src.processor import *
from src.common.training_util import loadDatasets, processRawDatasets
from src.evaluation import *
from src.processor.common import *

max_length= 128 if not use_body else 512

# Dataset Preprocessing

Load the `CodeGenerationProcessor` with the current config. 

**NOTE:** Google Colab is Python 3.*7* whereas the `py3_asdl.grammar` is for python 3.*8*. Thankfully it is not used here but do be aware of that. 

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
preprocessor = CodeGenerationProcessor(
    'best',
    f'bartBase{"wMined" if use_mined else ""}',
    tokenizer,
    Path('data','html_tags.txt'),
    Path('data','py3_asdl.grammar'),
    max_len=max_length,
    target_max_len=128,
    use_body=use_body,
    **preprocessor_kwargs
)
print(preprocessor.name)

best_bartBasewMined_512in_128out_Intent_Snippet_body


Load the datasets and process them

In [9]:

raw_datasets = loadDatasets(
        preprocessor=preprocessor,
        use_filter=False,
        load_dataset_args={'skip_api': True},
)

datasets, new_splits = processRawDatasets(
    raw_datasets,
    preprocessor,
    num_procs=4,
    shuffle_seed=21,
    val_size=.1,
    combine_mined_train=use_mined
)

for split_name, split in new_splits.items():
    raw_datasets[split_name] = split

Using custom data configuration default-47d77f710bd55aad
Reusing dataset base_dataset (/root/.cache/huggingface/datasets/base_dataset/default-47d77f710bd55aad/1.1.0/bac1a70da216dc0672d566e31d37c5e39611cd59e4d60c6085235760de207aa7)


Processing split named 'test'
 

HBox(children=(FloatProgress(value=0.0, description='#0', max=125.0, style=ProgressStyle(description_width='in…

 

HBox(children=(FloatProgress(value=0.0, description='#1', max=125.0, style=ProgressStyle(description_width='in…

  

HBox(children=(FloatProgress(value=0.0, description='#2', max=125.0, style=ProgressStyle(description_width='in…




HBox(children=(FloatProgress(value=0.0, description='#3', max=125.0, style=ProgressStyle(description_width='in…




Processing split named 'val'
 

HBox(children=(FloatProgress(value=0.0, description='#0', max=310.0, style=ProgressStyle(description_width='in…

 

HBox(children=(FloatProgress(value=0.0, description='#1', max=309.0, style=ProgressStyle(description_width='in…

 


HBox(children=(FloatProgress(value=0.0, description='#2', max=309.0, style=ProgressStyle(description_width='in…

 

HBox(children=(FloatProgress(value=0.0, description='#3', max=309.0, style=ProgressStyle(description_width='in…




Processing split named 'train'
 

HBox(children=(FloatProgress(value=0.0, description='#0', max=2782.0, style=ProgressStyle(description_width='i…

 

HBox(children=(FloatProgress(value=0.0, description='#1', max=2782.0, style=ProgressStyle(description_width='i…

 

HBox(children=(FloatProgress(value=0.0, description='#2', max=2782.0, style=ProgressStyle(description_width='i…

 

HBox(children=(FloatProgress(value=0.0, description='#3', max=2781.0, style=ProgressStyle(description_width='i…







## Sample Inputs for the Data

In [18]:
num_to_print = 5
print(f"Data from {preprocessor.name}:")
print()
for k in ['train','val','test']:
    print(f"{k} ({datasets[k].num_rows} examples):")
    print(f"\tFirst {num_to_print} ids in {k}: {datasets[k]['question_id'][:num_to_print]}")
    print(f"\tFirst {num_to_print} text:")
    for i in range(num_to_print): 
        decoded = repr(tokenizer.decode(datasets[k][i]['input_ids']))
        print(f"\t\t{decoded[:128]}")


Data from best_bartBasewMined_512in_128out_Intent_Snippet_body:

train (11127 examples):
	First 5 ids in train: ['4576115', '280222', '4967580', '32800623', '13114512']
	First 5 text:
		"<s>convert a list to a dictionary in python  Let's say I have a list  a   in Python whose entries conveniently map to a diction
		"<s>python - sort a list of nested lists  I have input consisting of a list of nested lists like this:\n l = [[[[[39]]]], [1, 2,
		'<s>how to get the size of a string in python?  For example, I get a string:\n str = "please answer my question"\n\n I want to w
		"<s>how to get the fft of a numpy array to work?  I'm reading a specific column of a csv file as a numpy array. When I try to do
		"<s>calculating difference between two rows in python / pandas  In python, how can I reference previous row and calculate someth
val (1237 examples):
	First 5 ids in val: ['28134319', '36368252', '3258573', '30108372', '6086047']
	First 5 text:
		"<s>remove first and last lines of string `

# Setting up the Model + Evaluator

In [11]:

config = AutoConfig.from_pretrained(model_name)
config.max_length = preprocessor.max_target_len
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    config=config
)
model.resize_token_embeddings(preprocessor.vocab_size).to(device)
if not hasattr(model, 'keys_to_ignore_at_inference'):
    model.keys_to_ignore_at_inference = []
model.keys_to_ignore_at_inference.extend(preprocessor.ignore_keys)

In [12]:

evaluator = Seq2Seq.CodeGenerationEvaluator(tokenizer, device,smooth_bleu=True)

In [13]:
def trainingArgs(logging_dir, label_names = None):
    if label_names is None:
        label_names = ['labels']
    _config = ml_collections.ConfigDict()
    _config.output_dir = logging_dir
    _config.evaluation_strategy = "epoch"

    batch_size = 8 if max_length > 512 else (16 if max_length > 128 else 32) 

    _config.per_device_train_batch_size = batch_size
    _config.per_device_eval_batch_size = batch_size
    _config.logging_dir = logging_dir
    _config.save_total_limit = 2
    _config.learning_rate = 5e-5
    _config.seed = seed
    _config.num_train_epochs = 10
    _config.dataloader_num_workers = 0
    _config.label_names = label_names
    _config.no_cuda = False
    _config.load_best_model_at_end = True
    _config.group_by_length =True
    _config.warmup_ratio = 0.05
    _config.logging_strategy='epoch'
    return _config

# Training

In [14]:
args_dict = trainingArgs("./experiments/")
training_args = Seq2SeqTrainingArguments(**args_dict)
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer,
    pad_to_multiple_of=4
)
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    # compute_metrics=evaluator,
    train_dataset=datasets['train'],
    eval_dataset=datasets['val'],
    data_collator=data_collator,
    tokenizer=tokenizer,
)

In [15]:
trainer.train()
trainer.save_state()

Epoch,Training Loss,Validation Loss,Runtime,Samples Per Second
1,2.6727,1.89398,8.8014,140.546
2,1.7886,1.588046,8.7712,141.03
3,1.448,1.464866,8.6923,142.309
4,1.2336,1.417396,8.8172,140.294
5,1.0653,1.352143,8.8475,139.813
6,0.9313,1.342393,8.8323,140.054
7,0.8235,1.324072,8.7208,141.845
8,0.7385,1.31574,8.7639,141.148
9,0.6738,1.326935,8.8407,139.922
10,0.6275,1.328182,8.9291,138.535


# Evaluation

In [16]:
num_return = 4
gen_kwargs ={
    'early_stopping':True,
    'num_beams': num_return, 
    'length_penalty': .9,
    'num_return_sequences':num_return,
}

In [17]:
evaluator.minimal=False
utility.evaluateExperiments(
    trainer, 
    preprocessor, 
    datasets,
    raw_datasets,
    evaluator, 
    Path(),
    Path('experiments'),
    allow_overwrite=True,
    batch_size=16,
    gen_kwargs=gen_kwargs,
)


Evaluating experiment 'best_bartBasewMined_512in_128out_Intent_Snippet_body' to 'best_bartBasewMined_512in_128out_Intent_Snippet_body'


HBox(children=(FloatProgress(value=0.0, description='Generating', max=78.0, style=ProgressStyle(description_wi…


val Evaluation:
	                                BLEU: 32.67  
	                           SacreBLEU: 32.00  
	              BLEU-Unigram-Precision: 58.90  
	             Sacre-Unigram-Precision: 56.39  
	                             ROUGE-2: 24.88  
	                             ROUGE-L: 39.78  
	               BLEU-Bigram-Precision: 39.14  
	              Sacre-Bigram-Precision: 36.70  
	              BLEU-Trigram-Precision: 28.23  
	             Sacre-Trigram-Precision: 28.48  


HBox(children=(FloatProgress(value=0.0, description='Generating', max=32.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Calculating Cheating', max=500.0, style=ProgressStyle(des…


test Evaluation:
	                                BLEU: 36.41  
	                           SacreBLEU: 35.91  
	              BLEU-Unigram-Precision: 66.62  
	             Sacre-Unigram-Precision: 64.93  
	                             ROUGE-2: 31.49  
	                             ROUGE-L: 50.03  
	               BLEU-Bigram-Precision: 46.99  
	              Sacre-Bigram-Precision: 45.43  
	              BLEU-Trigram-Precision: 34.98  
	             Sacre-Trigram-Precision: 35.77  
	                          cheat_BLEU: mean=-0.12   var=2.71    std=1.65   
	                     cheat_SacreBLEU: mean=-0.07   var=2.58    std=1.60   
	        cheat_BLEU-Unigram-Precision: mean=5.61    var=234.45  std=15.31  
	       cheat_Sacre-Unigram-Precision: mean=5.74    var=260.27  std=16.13  
	                       cheat_ROUGE-2: mean=1.67    var=27.97   std=5.29   
	                       cheat_ROUGE-L: mean=0.58    var=30.55   std=5.53   
	         cheat_BLEU-Bigram-Precision: mean=11.33   var=