# codeT5

In [None]:
%%capture
!pip install datasets
!pip install evaluate
!pip install transformers
!pip install accelerate -U
!pip install codebleu

import datasets
from datasets import Dataset
from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModel, AdamW,
    AutoModelForCausalLM, Trainer, TrainingArguments, TrainerCallback, default_data_collator)
import evaluate
import numpy as np
from google.colab import drive, files
import os
import dataclasses
from dataclasses import dataclass, field
from tqdm import tqdm

from typing import Dict, List, Optional
import torch
from transformers import AutoTokenizer
from datasets import load_metric
from codebleu import calc_codebleu
from transformers import RobertaTokenizer, T5ForConditionalGeneration

In [None]:
# Mount Google Drive to access the files
drive.mount('/content/drive')

# Define the path to save the CSV files
baseline = '/content/drive/My Drive/dsml/1_project/Model/codet5/codet5_baseline'

bert_sr = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_bertsr_n3/checkpoint-6000'
word2vec_sr = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_w2vsr_n3/checkpoint-6000'
t5_bt = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_backtc/checkpoint-9000'
ranaug = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_ranaug_n3_0.15/checkpoint-6000'

gpt_aug_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_gpt3/checkpoint-6500'
gpt_aug_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_gpt3/checkpoint-7000'
gpt_aug_3 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_gpt3/checkpoint-7500'

Mounted at /content/drive


In [None]:
# import the base model and small model
tokenizer_t5b = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')
model_t5b = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base')#(baseline)
# tokenizer_t5s = RobertaTokenizer.from_pretrained('Salesforce/codet5-small')
# model_t5s = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small')

# load rawdataset for test
test_dataset = load_dataset('codeparrot/xlcost-text-to-code', "Python-program-level", split='test')

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/703k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/294k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/12.5k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.61k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/3.32k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/11.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.02M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/570k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [None]:
# tokenise the datasets
max_input_length =  400
max_target_length = 400

# tokenize the examples
def convert_to_features(example_batch):

    input_encodings = tokenizer_t5b.batch_encode_plus(example_batch['text'],
                                                  max_length=max_input_length,
                                                  add_special_tokens=True,
                                                  truncation=True,
                                                  pad_to_max_length=True)

    target_encodings = tokenizer_t5b.batch_encode_plus(example_batch['code'],
                                                   max_length=max_target_length,
                                                   add_special_tokens=True,
                                                   truncation=True, pad_to_max_length=True)

    encodings = {
        'input_ids': input_encodings['input_ids'],
        'attention_mask': input_encodings['attention_mask'],
        'decoder_input_ids': target_encodings['input_ids'],
        'decoder_attention_mask': target_encodings['attention_mask']
    }

    return encodings

def add_eos_examples(example):
  example['code'] = example['code'] + tokenizer_t5b.sep_token
  example['text'] = example['text'] + tokenizer_t5b.sep_token

  return example

t_test_dataset  = test_dataset.map(add_eos_examples)
t_test_dataset  = t_test_dataset.map(convert_to_features,  batched=True)

test_dataset = t_test_dataset.remove_columns(["text", "code"])

columns = ['input_ids', 'decoder_input_ids', 'attention_mask', 'decoder_attention_mask']
test_dataset.set_format(type='torch', columns=columns)

Map:   0%|          | 0/887 [00:00<?, ? examples/s]

Map:   0%|          | 0/887 [00:00<?, ? examples/s]



## codet5-base

In [None]:
# metric_bleu = load_metric("bleu")
# metric_codebleu = calc_codebleu

# Check if CUDA is available, and if so, use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def text_generation(model, tokenizer):
  labels_texts = []
  preds_texts = []

  for data in tqdm(test_dataset, desc='Generating predictions'):
    model = model.to(device)
    input_ids = data['input_ids'].to(device)
    labels_ids = data['decoder_input_ids'].to(device)
    # Set the model to evaluation mode
    model.eval()
    generated_ids = model.generate(input_ids.unsqueeze(0), max_length=400)

    # Convert the IDs to tokens
    labels_tokens = tokenizer.convert_ids_to_tokens(labels_ids.tolist())  # convert tensor to list
    preds_tokens = tokenizer.convert_ids_to_tokens(generated_ids[0].tolist())
    # Remove all sepcial tokens
    labels_tokens = [token for token in labels_tokens if token not in [tokenizer.sep_token, tokenizer.pad_token, tokenizer.eos_token,tokenizer.bos_token]]
    preds_tokens = [token for token in preds_tokens if token not in [tokenizer.sep_token, tokenizer.pad_token, tokenizer.eos_token,tokenizer.bos_token]]

    # Convert the tokens to a string
    labels_text = tokenizer.convert_tokens_to_string(labels_tokens)
    preds_text = tokenizer.convert_tokens_to_string(preds_tokens)

    labels_texts.append(labels_text)
    preds_texts.append(preds_text)

  return labels_texts, preds_texts

bleu = evaluate.load("bleu")
em = evaluate.load("exact_match")

def structured_code(code):
    lines = code.split('NEW_LINE')[:-1]
    codes = lines[:]
    lines = [line.strip() for line in lines]

    for i, line in enumerate(lines):
        if 'INDENT' in line:
            lines[i] = line.replace('INDENT ', '')
            lines[i:] = [' '*4 + str(l) for l in lines[i:]]
        while 'DEDENT' in line:
            lines[i] = line.replace('DEDENT ', '', 1)
            lines[i:] = [str(l)[4:] for l in lines[i:]]
            line = lines[i]

    code = ''
    for line in enumerate(lines):
        code = code + line[1] + '\n'

    return code[:-1]

def calc_score(pred_texts, ref_texts):
    codebleu_scores = []
    bleu_scores = []

    for reference, prediction in zip(ref_texts, pred_texts):
        reference, prediction = structured_code(reference), structured_code(prediction)
        # For CodeBLEU score
        score_codebleu = calc_codebleu([reference], [prediction], lang="python", weights=(0.25, 0.25, 0.25, 0.25), tokenizer=None)
        codebleu_scores.append(score_codebleu['codebleu'])

        # For BLEU score
        result_bleu = bleu.compute(predictions=[prediction], references=[reference])
        bleu_scores.append(result_bleu['bleu'])


    # Compute average scores
    average_codebleu_score = round(sum(codebleu_scores) / len(codebleu_scores), 4)
    average_bleu_score = round(sum(bleu_scores) / len(bleu_scores), 4)

    return average_codebleu_score, average_bleu_score

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/5.67k [00:00<?, ?B/s]

### Baseline

In [None]:
model_t5b = T5ForConditionalGeneration.from_pretrained(baseline)
labels_texts, preds_texts = text_generation(model_t5b, tokenizer_t5b)

Generating predictions:  11%|█▏        | 102/887 [06:06<46:58,  3.59s/it]


KeyboardInterrupt: ignored

In [None]:
%%capture
average_codebleu_score, average_bleu_score = calc_score(preds_texts, labels_texts)

In [None]:
print(average_codebleu_score)
print(average_bleu_score)

0.3799
0.3203


### Bert rs

In [None]:
bert_sr_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5/codet5_bertsr/checkpoint-6000'
bert_sr_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_bertsr_n1/checkpoint-6000'
bert_sr_3 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_bertsr_n3/checkpoint-6000'

# 15% and 1 augment
model_bsr_1 = T5ForConditionalGeneration.from_pretrained(bert_sr_1)
labels_texts_bsr_1, preds_texts_bsr_1 = text_generation(model_bsr_1, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_bsr_1, average_bleu_score_bsr_1= calc_score(preds_texts_bsr_1, labels_texts_bsr_1)

In [None]:
print(average_codebleu_score_bsr_1)
print(average_bleu_score_bsr_1)

0.3832
0.3227


In [None]:
# 30% and 1 augment
model_bsr_2 = T5ForConditionalGeneration.from_pretrained(bert_sr_2)
labels_texts_bsr_2, preds_texts_bsr_2 = text_generation(model_bsr_2, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [46:15<00:00,  3.13s/it]


In [None]:
%%capture
average_codebleu_score_bsr_2, average_bleu_score_bsr_2 = calc_score(preds_texts_bsr_2, labels_texts_bsr_2)

In [None]:
print(average_codebleu_score_bsr_2)
print(average_bleu_score_bsr_2)

0.382
0.3177


In [None]:
# 30% and 3 augment
model_bsr_3 = T5ForConditionalGeneration.from_pretrained(bert_sr_3)
labels_texts_bsr_3, preds_texts_bsr_3 = text_generation(model_bsr_3, tokenizer_t5b)

Generating predictions:   6%|▌         | 51/887 [02:57<53:38,  3.85s/it]

In [None]:
%%capture
average_codebleu_score_bsr_3, average_bleu_score_bsr_3 = calc_score(preds_texts_bsr_3, labels_texts_bsr_3)

In [None]:
print(average_codebleu_score_bsr_3)
print(average_bleu_score_bsr_3)

0.3776
0.3131


### Word2vec

In [None]:
word2vec_sr_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5/codet5_w2vsr/checkpoint-5000'
word2vec_sr_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_w2vsr_n1/checkpoint-6000'
word2vec_sr_3 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_w2vsr_n3/checkpoint-6000'

# 15% and 1 augment
model_wv_1 = T5ForConditionalGeneration.from_pretrained(word2vec_sr_1)
labels_texts_wv_1, preds_texts_wv_1 = text_generation(model_wv_1, tokenizer_t5b)

OSError: ignored

In [None]:
%%capture
average_codebleu_score_wv_1, average_bleu_score_wv_1 = calc_score(preds_texts_wv_1, labels_texts_wv_1)

In [None]:
print(average_codebleu_score_wv_1)
print(average_bleu_score_wv_1)

0.3779
0.3162


In [None]:
# 30% and 1 augment
model_wv_2 = T5ForConditionalGeneration.from_pretrained(word2vec_sr_2)
labels_texts_wv_2, preds_texts_wv_2 = text_generation(model_wv_2, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [48:06<00:00,  3.25s/it]


In [None]:
%%capture
average_codebleu_score_wv_2, average_bleu_score_wv_2 = calc_score(preds_texts_wv_2, labels_texts_wv_2)

In [None]:
print(average_codebleu_score_wv_2)
print(average_bleu_score_wv_2)

0.3825
0.3192


In [None]:
# 30% and 3 augment
model_wv_3 = T5ForConditionalGeneration.from_pretrained(word2vec_sr_3)
labels_texts_wv_3, preds_texts_wv_3 = text_generation(model_wv_3, tokenizer_t5b)

Generating predictions:  68%|██████▊   | 602/887 [33:44<11:59,  2.52s/it]

In [None]:
%%capture
average_codebleu_score_wv_3, average_bleu_score_wv_3 = calc_score(preds_texts_wv_3, labels_texts_wv_3)

In [None]:
print(average_codebleu_score_wv_3)
print(average_bleu_score_wv_3)

0.3848
0.3213


### RandomAug

In [None]:
ranaug5_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5/codet5_ranaug/checkpoint-5000'
ranaug5_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_ranaug_n3_0.15/checkpoint-6000'
ranaug5_3 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_ranaug_n1/checkpoint-6000' # this should be 30% with 1 aug
ranaug5_4 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_ranaug/checkpoint-6000' # this should be 30% with 3 aug
ranaug5_5 = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_ranaug_n3_0.5/checkpoint-6000'
# 15% and 1 augment
model_ra_1 = T5ForConditionalGeneration.from_pretrained(ranaug5_1)
labels_texts_ra_1, preds_texts_ra_1 = text_generation(model_ra_1, tokenizer_t5b)

Generating predictions:   0%|          | 0/887 [00:11<?, ?it/s]


KeyboardInterrupt: ignored

In [None]:
%%capture
average_codebleu_score_ra_1, average_bleu_score_ra_1 = calc_score(preds_texts_ra_1, labels_texts_ra_1)

In [None]:
print(average_codebleu_score_ra_1)
print(average_bleu_score_ra_1) # 30% for three n

0.3782
0.3161


In [None]:
# 15% and 3 augment
model_ra_2 = T5ForConditionalGeneration.from_pretrained(ranaug5_2)
labels_texts_ra_2, preds_texts_ra_2 = text_generation(model_ra_2, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [55:55<00:00,  3.78s/it]


In [None]:
%%capture
average_codebleu_score_ra_2, average_bleu_score_ra_2 = calc_score(preds_texts_ra_2, labels_texts_ra_2)

In [None]:
print(average_codebleu_score_ra_2)
print(average_bleu_score_ra_2)

0.3853
0.3243


In [None]:
# 30% and 1 augment
model_ra_3 = T5ForConditionalGeneration.from_pretrained(ranaug5_3)
labels_texts_ra_3, preds_texts_ra_3 = text_generation(model_ra_3, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [53:44<00:00,  3.64s/it]


In [None]:
%%capture
average_codebleu_score_ra_3, average_bleu_score_ra_3 = calc_score(preds_texts_ra_3, labels_texts_ra_3)

In [None]:
print(average_codebleu_score_ra_3)
print(average_bleu_score_ra_3)

0.3847
0.3209


In [None]:
# 30% and 3 augment
model_ra_4 = T5ForConditionalGeneration.from_pretrained(ranaug5_4)
labels_texts_ra_4, preds_texts_ra_4 = text_generation(model_ra_4, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [54:24<00:00,  3.68s/it]


In [None]:
%%capture
average_codebleu_score_ra_4, average_bleu_score_ra_4 = calc_score(preds_texts_ra_4, labels_texts_ra_4)

In [None]:
print(average_codebleu_score_ra_4)
print(average_bleu_score_ra_4)

0.3877
0.3274


In [None]:
# 50% and 3 augment
model_ra_5 = T5ForConditionalGeneration.from_pretrained(ranaug5_5)
labels_texts_ra_5, preds_texts_ra_5 = text_generation(model_ra_5, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [54:19<00:00,  3.67s/it]


In [None]:
%%capture
average_codebleu_score_ra_5, average_bleu_score_ra_5 = calc_score(preds_texts_ra_5, labels_texts_ra_5)

In [None]:
print(average_codebleu_score_ra_5)
print(average_bleu_score_ra_5)

0.3842
0.3239


### Backtranslation

In [None]:
t5_btg = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_backtg/checkpoint-6000'
t5_btf = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_backtf/checkpoint-6000'
t5_bts = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_backts/checkpoint-6000'

# german
model_btg = T5ForConditionalGeneration.from_pretrained(t5_btg)
labels_texts_btg, preds_texts_btg = text_generation(model_btg, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_btg, average_bleu_score_btg= calc_score(preds_texts_btg, labels_texts_btg)

In [None]:
print(average_codebleu_score_btg)
print(average_bleu_score_btg)

In [None]:
# french
model_btf = T5ForConditionalGeneration.from_pretrained(t5_btf)
labels_texts_btf, preds_texts_btf = text_generation(model_btf, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_btf, average_bleu_score_btf= calc_score(preds_texts_btf, labels_texts_btf)

In [None]:
print(average_codebleu_score_btf)
print(average_bleu_score_btf)

In [None]:
# spanish
model_bts = T5ForConditionalGeneration.from_pretrained(t5_bts)
labels_texts_bts, preds_texts_bts = text_generation(model_bts, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_bts, average_bleu_score_bts= calc_score(preds_texts_bts, labels_texts_bts)

In [None]:
print(average_codebleu_score_bts)
print(average_bleu_score_bts)

In [None]:
t5_btc = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_backtc/checkpoint-6000'
# combine
model_btc = T5ForConditionalGeneration.from_pretrained(t5_btc)
labels_texts_btc, preds_texts_btc = text_generation(model_btc, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_btc, average_bleu_score_btc= calc_score(preds_texts_btc, labels_texts_btc)

NameError: ignored

In [None]:
print(average_codebleu_score_btc)
print(average_bleu_score_btc)

In [None]:
t5_btg_half = '/content/drive/My Drive/dsml/1_project/Model/codet5_2/codet5_backt_gf/checkpoint-6000'
# combine
model_btg_half = T5ForConditionalGeneration.from_pretrained(t5_btg_half)
labels_texts_btg_half, preds_texts_btg_half = text_generation(model_btg_half, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [53:01<00:00,  3.59s/it]


In [None]:
%%capture
average_codebleu_score_btg_half, average_bleu_score_btg_half= calc_score(preds_texts_btg_half, labels_texts_btg_half)

In [None]:
print(average_codebleu_score_btg_half)
print(average_bleu_score_btg_half)

0.3808
0.3173


### GPT3

In [None]:
model_gpt1 = T5ForConditionalGeneration.from_pretrained(gpt_aug_1)
labels_texts_gpt1, preds_texts_gpt1 = text_generation(model_gpt1, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_gpt1, average_bleu_score_gpt1 = calc_score(preds_texts_gpt1, labels_texts_gpt1)

In [None]:
print(average_codebleu_score_gpt1)
print(average_bleu_score_gpt1)

In [None]:
model_gpt2 = T5ForConditionalGeneration.from_pretrained(gpt_aug_2)
labels_texts_gpt2, preds_texts_gpt2 = text_generation(model_gpt2, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_gpt2, average_bleu_score_gpt2 = calc_score(preds_texts_gpt2, labels_texts_gpt2)

In [None]:
print(average_codebleu_score_gpt2)
print(average_bleu_score_gpt2)

In [None]:
model_gpt3 = T5ForConditionalGeneration.from_pretrained(gpt_aug_3)
labels_texts_gpt3, preds_texts_gpt3 = text_generation(model_gpt3, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_gpt3, average_bleu_score_gpt3 = calc_score(preds_texts_gpt3, labels_texts_gpt3)

In [None]:
print(average_codebleu_score_gpt3)
print(average_bleu_score_gpt3)

### Textattack

In [None]:
char_aug_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_char_1_0.15/checkpoint-6000'
char_aug_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_char_1_0.3/checkpoint-6000'
char_aug_3 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_char_3_0.3/checkpoint-6000'

# 0.15_1
model_char1 = T5ForConditionalGeneration.from_pretrained(char_aug_1)
labels_texts_char1, preds_texts_char1 = text_generation(model_char1, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_char1, average_bleu_score_char1 = calc_score(preds_texts_char1, labels_texts_char1)

In [None]:
print(average_codebleu_score_char1)
print(average_bleu_score_char1)

In [None]:
model_char2 = T5ForConditionalGeneration.from_pretrained(char_aug_2)
labels_texts_char2, preds_texts_char2 = text_generation(model_char2, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_char2, average_bleu_score_char2 = calc_score(preds_texts_char2, labels_texts_char2)

In [None]:
print(average_codebleu_score_char2)
print(average_bleu_score_char2)

In [None]:
model_char3 = T5ForConditionalGeneration.from_pretrained(char_aug_3)
labels_texts_char3, preds_texts_char3 = text_generation(model_char3, tokenizer_t5b)

In [None]:
%%capture
average_codebleu_score_char3, average_bleu_score_char3 = calc_score(preds_texts_char3, labels_texts_char3)

In [None]:
print(average_codebleu_score_char3)
print(average_bleu_score_char3)

### wordnet

In [None]:
wordnet_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_wordnet_1_0.15/checkpoint-6000'
wordnet_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_wordnet_1_0.3/checkpoint-6000'

# 0.15_1
model_wordnet1 = T5ForConditionalGeneration.from_pretrained(wordnet_1)
labels_texts_wordnet1, preds_texts_wordnet1 = text_generation(model_wordnet1, tokenizer_t5b)

KeyboardInterrupt: ignored

In [None]:
%%capture
average_codebleu_score_wordnet1, average_bleu_score_wordnet1 = calc_score(preds_texts_wordnet1, labels_texts_wordnet1)

In [None]:
print(average_codebleu_score_wordnet1)
print(average_bleu_score_wordnet1)

0.3846
0.3209


In [None]:
wordnet_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_wordnet_1_0.3/checkpoint-6000'

model_wordnet2 = T5ForConditionalGeneration.from_pretrained(wordnet_2)
labels_texts_wordnet2, preds_texts_wordnet2 = text_generation(model_wordnet2, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [51:46<00:00,  3.50s/it]


In [None]:
%%capture
average_codebleu_score_wordnet2, average_bleu_score_wordnet2 = calc_score(preds_texts_wordnet2, labels_texts_wordnet2)

In [None]:
print(average_codebleu_score_wordnet2)
print(average_bleu_score_wordnet2)

0.3802
0.3173


### tf-idf

In [None]:
tfidf_1 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_tfidf_1_0.15/checkpoint-6000'
tfidf_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_tfidf_1_0.3/checkpoint-6000'

# 0.15_1
model_tfidf1 = T5ForConditionalGeneration.from_pretrained(tfidf_1)
labels_texts_tfidf1, preds_texts_tfidf1 = text_generation(model_tfidf1, tokenizer_t5b)

Generating predictions:   0%|          | 2/887 [00:36<4:28:01, 18.17s/it]


KeyboardInterrupt: ignored

In [None]:
%%capture
average_codebleu_score_tfidf1, average_bleu_score_tfidf1 = calc_score(preds_texts_tfidf1, labels_texts_tfidf1)

NameError: ignored

In [None]:
print(average_codebleu_score_tfidf1)
print(average_bleu_score_tfidf1)

0.3804
0.3173


In [None]:
tfidf_2 = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_tfidf_1_0.3/checkpoint-6000'

model_tfidf2 = T5ForConditionalGeneration.from_pretrained(tfidf_2)
labels_texts_tfidf2, preds_texts_tfidf2 = text_generation(model_tfidf2, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [50:46<00:00,  3.43s/it]


In [None]:
%%capture
average_codebleu_score_tfidf2, average_bleu_score_tfidf2 = calc_score(preds_texts_tfidf2, labels_texts_tfidf2)

In [None]:
print(average_codebleu_score_tfidf2)
print(average_bleu_score_tfidf2)

0.3802
0.3173


## Half

In [None]:
# baseline
half_baseline = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_baseline_half/checkpoint-3000'

model_h_baseline = T5ForConditionalGeneration.from_pretrained(half_baseline)
labels_texts_h_baseline, preds_texts_h_baseline = text_generation(model_h_baseline, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [53:34<00:00,  3.62s/it]


In [None]:
%%capture
average_codebleu_score_h_baseline, average_bleu_score_h_baseline = calc_score(preds_texts_h_baseline, labels_texts_h_baseline)

In [None]:
print(average_codebleu_score_h_baseline)
print(average_bleu_score_h_baseline)

0.3611
0.2985


In [None]:
# half augment by backtranslation German
half_g = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_backt_halfg/checkpoint-3000'

model_h_backg = T5ForConditionalGeneration.from_pretrained(half_g)
labels_texts_h_backg, preds_texts_h_backg = text_generation(model_h_backg, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [52:39<00:00,  3.56s/it]


In [None]:
%%capture
average_codebleu_score_h_backg, average_bleu_score_h_backg = calc_score(preds_texts_h_backg, labels_texts_h_backg)

In [None]:
print(average_codebleu_score_h_backg)
print(average_bleu_score_h_backg)

0.3531
0.2885


In [None]:
# half augment by backtranslation German
half_f = '/content/drive/My Drive/dsml/1_project/Model/codet5_3/codet5_backt_halff/checkpoint-3000'

model_h_backf = T5ForConditionalGeneration.from_pretrained(half_f)
labels_texts_h_backf, preds_texts_h_backf = text_generation(model_h_backf, tokenizer_t5b)

Generating predictions: 100%|██████████| 887/887 [52:37<00:00,  3.56s/it]


In [None]:
%%capture
average_codebleu_score_h_backf, average_bleu_score_h_backf = calc_score(preds_texts_h_backf, labels_texts_h_backf)

In [None]:
print(average_codebleu_score_h_backf)
print(average_bleu_score_h_backf)

0.3586
0.2934
