## Error analysis of finetune model on function name prediction

In [1]:
# Since I worked on runopod I have pretty small container storage,
# so I have to put HF stuff on network disk
import os
os.environ["HF_HOME"] = "/workspace/hf"

In [5]:
from datasets import load_dataset
# We will use only 1000 samples to get reasonable fast infernce
test_dst = load_dataset("hynky/jetbrains-community-function_name")["test"].shuffle(seed=42).select(range(1000))
# Sort by length for faster inference
test_dst = test_dst.map(lambda x: {"len": len(x["source_code"])})
test_dst = test_dst.sort("len")

In [6]:
import re
def get_fc_name_tokens(fc_name):
    """
    This function parses the function name and returns its "tokens",
    splitting on underscores and camel case.
    """
    # Split on underscores
    parts = fc_name.split('_')

    # Split camel case parts
    tokens = []
    for part in parts:
        tokens.extend(re.findall(r'[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)', part))

    return tokens


In [7]:
import numpy as np

def calculate_precision(expected, predicted):
    matching_tokens = np.sum(np.isin(predicted, expected))
    precision = matching_tokens / len(predicted) if predicted else 0
    return precision

def calculate_recall(expected, predicted):
    matching_tokens = np.sum(np.isin(predicted, expected))
    recall = matching_tokens / len(expected) if expected else 0
    return recall

def calculate_f1(precision, recall):
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall else 0
    return f1

def calculate_average_metric(expected_tokens, predicted_tokens, metric_func):
    total_metric = np.sum([metric_func(expected, predicted) for expected, predicted in zip(expected_tokens, predicted_tokens)])
    average_metric = total_metric / len(expected_tokens)
    return average_metric

In [8]:
from sklearn.metrics import f1_score, accuracy_score
import torch
from tqdm import tqdm

def predict_with_model(model, tokenizer, dataset, batch_size: int, max_length: int = 2048, max_gen_tokens=100):
    padding = True if batch_size > 1 else "do_not_pad"
    predictions = []
    for batch_start in tqdm(range(0, len(dataset), batch_size)):
        batch = dataset.select(range(batch_start, min(batch_start + batch_size, len(dataset))))
        tokenized = batch.map(lambda x: tokenizer(x["text"], padding=padding, 
                                                    add_special_tokens=False, max_length=max_length,
                                                    truncation=True, verbose=False), batched=True)
        inputs = torch.tensor(tokenized["input_ids"]).to("cuda")
        print(inputs.shape)
        attention_mask = torch.tensor(tokenized["attention_mask"]).to("cuda")
        generated_tokens = model.generate(inputs=inputs,
                                attention_mask=attention_mask,
                                max_new_tokens=max_gen_tokens)

        generated_tokens = generated_tokens.detach().to("cpu")
        predicted_names = tokenizer.batch_decode(generated_tokens[:, inputs.shape[1]:], skip_special_tokens=True)
        predictions.extend(predicted_names)

        # Free cuda memory
        del generated_tokens
        del inputs
        del attention_mask
        import gc
        gc.collect()
        torch.cuda.empty_cache()

    return predictions

def calculate_accuracy(true_tokens, predicted_tokens, tokenizer):
    # Convert tokens to text
    true_texts = [tokenizer.decode(tokens) for tokens in true_tokens]
    predicted_texts = [tokenizer.decode(tokens) for tokens in predicted_tokens]

    # Calculate accuracy
    accuracy = accuracy_score(true_texts, predicted_texts)

    return accuracy

import re

def extract_function_name(text):
    match = re.search(r"```\s*([^`]*)\s*```", text)
    if match:
        return match.group(1)
    
    match_simple = re.search(r"`\s*([^`]*)\s*`", text)
    if match_simple:
        return match_simple.group(1)
    return text

def evaluate_with_model(model, tokenizer, dataset, batch_size: int, max_length: int = 2048, max_gen_tokens=100, transform_predictions=lambda x: x):
    # model = None
    predictions = predict_with_model(model, tokenizer, dataset, batch_size, max_length, max_gen_tokens)
    predictions = transform_predictions(predictions)
    expected_tokens = [get_fc_name_tokens(x) for x in dataset["function_name"]]
    predictions_tokens = [get_fc_name_tokens(x) for x in predictions]

    precision = [calculate_precision(exp, pred) for exp,pred in zip(expected_tokens, predictions_tokens)]
    recall = [calculate_recall(exp, pred) for exp,pred in zip(expected_tokens, predictions_tokens)]
    f1 = [calculate_f1(prec, recall) for prec,recall in zip(precision, recall)]
    return {
        "accuracy": accuracy_score(predictions, dataset["function_name"]),
        "recall":  np.sum(recall) / len(predictions_tokens),
        "precision": np.sum(precision) / len(predictions_tokens),
        "f1": np.sum(f1) / len(predictions_tokens),
        "predictions": predictions
    }

## Simple prompting without finetune_model first

In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model_name = "codellama/CodeLlama-7b-Instruct-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# Since we are able to fit the model in memory, we don't have to go for peft
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, use_flash_attention_2=True, device_map="auto")

Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.90s/it]


In [15]:
# Tokenize
def apply_template(x, tokenizer):
    return {"text": tokenizer.apply_chat_template(
        [{"role": "system", "content": "Generate a fitting name for the provided function. Place the suggested function name inside tripplet backtics. E.g ```adder```"},
         {"role": "user", "content": x["source_code"]}],
        tokenize=False)
    }

dataset = test_dst.map(apply_template, fn_kwargs={"tokenizer": tokenizer})
# I have to keep the batch size low because long examples would yield CUDA OOM
# Again could be better implemented by choosing batch size based on length
results_base = evaluate_with_model(model, tokenizer, dataset, 6, transform_predictions=lambda x: [extract_function_name(y) for y in x], max_length=1024)

Map: 100%|██████████| 1000/1000 [00:00<00:00, 5789.16 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 1008.04 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 65])


Map: 100%|██████████| 6/6 [00:00<00:00, 783.20 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 66])


Map: 100%|██████████| 6/6 [00:00<00:00, 943.00 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 65])


Map: 100%|██████████| 6/6 [00:00<00:00, 721.19 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 65])


Map: 100%|██████████| 6/6 [00:00<00:00, 754.55 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 68])


Map: 100%|██████████| 6/6 [00:00<00:00, 904.37 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 68])


Map: 100%|██████████| 6/6 [00:00<00:00, 944.45 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 68])


Map: 100%|██████████| 6/6 [00:00<00:00, 1019.89 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 70])


Map: 100%|██████████| 6/6 [00:00<00:00, 773.50 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 70])


Map: 100%|██████████| 6/6 [00:00<00:00, 830.61 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 71])


Map: 100%|██████████| 6/6 [00:00<00:00, 864.48 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 71])


Map: 100%|██████████| 6/6 [00:00<00:00, 557.89 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 71])


Map: 100%|██████████| 6/6 [00:00<00:00, 788.53 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 71])


Map: 100%|██████████| 6/6 [00:00<00:00, 1006.47 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 73])


Map: 100%|██████████| 6/6 [00:00<00:00, 1097.51 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 71])


Map: 100%|██████████| 6/6 [00:00<00:00, 670.02 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 72])


Map: 100%|██████████| 6/6 [00:00<00:00, 971.20 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 72])


Map: 100%|██████████| 6/6 [00:00<00:00, 1000.75 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 75])


Map: 100%|██████████| 6/6 [00:00<00:00, 1017.99 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 76])


Map: 100%|██████████| 6/6 [00:00<00:00, 827.41 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 78])


Map: 100%|██████████| 6/6 [00:00<00:00, 825.38 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 76])


Map: 100%|██████████| 6/6 [00:00<00:00, 1010.15 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 75])


Map: 100%|██████████| 6/6 [00:00<00:00, 915.15 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 74])


Map: 100%|██████████| 6/6 [00:00<00:00, 1023.54 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 75])


Map: 100%|██████████| 6/6 [00:00<00:00, 971.24 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 79])


Map: 100%|██████████| 6/6 [00:00<00:00, 956.77 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 88])


Map: 100%|██████████| 6/6 [00:00<00:00, 823.30 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 78])


Map: 100%|██████████| 6/6 [00:00<00:00, 1004.38 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 78])


Map: 100%|██████████| 6/6 [00:00<00:00, 923.01 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 79])


Map: 100%|██████████| 6/6 [00:00<00:00, 758.94 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 79])


Map: 100%|██████████| 6/6 [00:00<00:00, 952.20 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 78])


Map: 100%|██████████| 6/6 [00:00<00:00, 831.93 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 82])


Map: 100%|██████████| 6/6 [00:00<00:00, 911.77 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 82])


Map: 100%|██████████| 6/6 [00:00<00:00, 864.66 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 80])


Map: 100%|██████████| 6/6 [00:00<00:00, 1007.96 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 79])


Map: 100%|██████████| 6/6 [00:00<00:00, 1055.26 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 79])


Map: 100%|██████████| 6/6 [00:00<00:00, 1034.78 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 82])


Map: 100%|██████████| 6/6 [00:00<00:00, 622.92 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 84])


Map: 100%|██████████| 6/6 [00:00<00:00, 863.23 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 81])


Map: 100%|██████████| 6/6 [00:00<00:00, 964.95 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 82])


Map: 100%|██████████| 6/6 [00:00<00:00, 815.91 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 84])


Map: 100%|██████████| 6/6 [00:00<00:00, 903.23 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 85])


Map: 100%|██████████| 6/6 [00:00<00:00, 733.70 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 86])


Map: 100%|██████████| 6/6 [00:00<00:00, 824.43 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 85])


Map: 100%|██████████| 6/6 [00:00<00:00, 759.24 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 86])


Map: 100%|██████████| 6/6 [00:00<00:00, 853.60 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 92])


Map: 100%|██████████| 6/6 [00:00<00:00, 943.14 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 86])


Map: 100%|██████████| 6/6 [00:00<00:00, 980.59 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 91])


Map: 100%|██████████| 6/6 [00:00<00:00, 762.86 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 83])


Map: 100%|██████████| 6/6 [00:00<00:00, 711.66 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 88])


Map: 100%|██████████| 6/6 [00:00<00:00, 702.74 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 89])


Map: 100%|██████████| 6/6 [00:00<00:00, 906.52 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 92])


Map: 100%|██████████| 6/6 [00:00<00:00, 1016.84 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 94])


Map: 100%|██████████| 6/6 [00:00<00:00, 914.46 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 98])


Map: 100%|██████████| 6/6 [00:00<00:00, 922.10 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 88])


Map: 100%|██████████| 6/6 [00:00<00:00, 791.40 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 91])


Map: 100%|██████████| 6/6 [00:00<00:00, 796.16 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 93])


Map: 100%|██████████| 6/6 [00:00<00:00, 751.64 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 91])


Map: 100%|██████████| 6/6 [00:00<00:00, 889.85 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 92])


Map: 100%|██████████| 6/6 [00:00<00:00, 823.87 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 100])


Map: 100%|██████████| 6/6 [00:00<00:00, 885.68 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 94])


Map: 100%|██████████| 6/6 [00:00<00:00, 996.59 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 92])


Map: 100%|██████████| 6/6 [00:00<00:00, 859.78 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 97])


Map: 100%|██████████| 6/6 [00:00<00:00, 713.40 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 95])


Map: 100%|██████████| 6/6 [00:00<00:00, 707.28 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 99])


Map: 100%|██████████| 6/6 [00:00<00:00, 836.19 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 95])


Map: 100%|██████████| 6/6 [00:00<00:00, 968.70 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 93])


Map: 100%|██████████| 6/6 [00:00<00:00, 903.46 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 104])


Map: 100%|██████████| 6/6 [00:00<00:00, 767.30 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 100])


Map: 100%|██████████| 6/6 [00:00<00:00, 959.03 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 99])


Map: 100%|██████████| 6/6 [00:00<00:00, 899.58 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 96])


Map: 100%|██████████| 6/6 [00:00<00:00, 960.31 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 94])


Map: 100%|██████████| 6/6 [00:00<00:00, 954.66 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 111])


Map: 100%|██████████| 6/6 [00:00<00:00, 914.16 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 106])


Map: 100%|██████████| 6/6 [00:00<00:00, 961.26 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 99])


Map: 100%|██████████| 6/6 [00:00<00:00, 848.36 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 109])


Map: 100%|██████████| 6/6 [00:00<00:00, 945.73 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 110])


Map: 100%|██████████| 6/6 [00:00<00:00, 877.04 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 114])


Map: 100%|██████████| 6/6 [00:00<00:00, 814.35 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 112])


Map: 100%|██████████| 6/6 [00:00<00:00, 939.27 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 107])


Map: 100%|██████████| 6/6 [00:00<00:00, 798.79 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 107])


Map: 100%|██████████| 6/6 [00:00<00:00, 791.08 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 111])


Map: 100%|██████████| 6/6 [00:00<00:00, 1006.75 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 122])


Map: 100%|██████████| 6/6 [00:00<00:00, 997.77 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 107])


Map: 100%|██████████| 6/6 [00:00<00:00, 1050.41 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 110])


Map: 100%|██████████| 6/6 [00:00<00:00, 1025.79 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 115])


Map: 100%|██████████| 6/6 [00:00<00:00, 983.12 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 112])


Map: 100%|██████████| 6/6 [00:00<00:00, 960.71 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 119])


Map: 100%|██████████| 6/6 [00:00<00:00, 954.48 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 114])


Map: 100%|██████████| 6/6 [00:00<00:00, 730.84 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 113])


Map: 100%|██████████| 6/6 [00:00<00:00, 708.64 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 112])


Map: 100%|██████████| 6/6 [00:00<00:00, 877.90 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 114])


Map: 100%|██████████| 6/6 [00:00<00:00, 819.33 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 118])


Map: 100%|██████████| 6/6 [00:00<00:00, 886.12 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 133])


Map: 100%|██████████| 6/6 [00:00<00:00, 940.64 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 127])


Map: 100%|██████████| 6/6 [00:00<00:00, 669.04 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 127])


Map: 100%|██████████| 6/6 [00:00<00:00, 894.37 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 123])


Map: 100%|██████████| 6/6 [00:00<00:00, 878.97 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 121])


Map: 100%|██████████| 6/6 [00:00<00:00, 877.78 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 133])


Map: 100%|██████████| 6/6 [00:00<00:00, 675.43 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 130])


Map: 100%|██████████| 6/6 [00:00<00:00, 856.48 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 140])


Map: 100%|██████████| 6/6 [00:00<00:00, 738.85 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 147])


Map: 100%|██████████| 6/6 [00:00<00:00, 700.76 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 127])


Map: 100%|██████████| 6/6 [00:00<00:00, 853.02 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 137])


Map: 100%|██████████| 6/6 [00:00<00:00, 573.54 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 151])


Map: 100%|██████████| 6/6 [00:00<00:00, 854.06 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 140])


Map: 100%|██████████| 6/6 [00:00<00:00, 827.17 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 152])


Map: 100%|██████████| 6/6 [00:00<00:00, 744.18 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 152])


Map: 100%|██████████| 6/6 [00:00<00:00, 871.27 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 137])


Map: 100%|██████████| 6/6 [00:00<00:00, 643.86 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 152])


Map: 100%|██████████| 6/6 [00:00<00:00, 650.30 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 154])


Map: 100%|██████████| 6/6 [00:00<00:00, 691.31 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 155])


Map: 100%|██████████| 6/6 [00:00<00:00, 737.33 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 156])


Map: 100%|██████████| 6/6 [00:00<00:00, 688.61 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 151])


Map: 100%|██████████| 6/6 [00:00<00:00, 720.55 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 151])


Map: 100%|██████████| 6/6 [00:00<00:00, 651.95 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 176])


Map: 100%|██████████| 6/6 [00:00<00:00, 726.54 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 172])


Map: 100%|██████████| 6/6 [00:00<00:00, 690.40 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 169])


Map: 100%|██████████| 6/6 [00:00<00:00, 685.27 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 158])


Map: 100%|██████████| 6/6 [00:00<00:00, 694.71 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 166])


Map: 100%|██████████| 6/6 [00:00<00:00, 763.34 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 171])


Map: 100%|██████████| 6/6 [00:00<00:00, 630.69 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 174])


Map: 100%|██████████| 6/6 [00:00<00:00, 819.97 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 183])


Map: 100%|██████████| 6/6 [00:00<00:00, 694.00 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 183])


Map: 100%|██████████| 6/6 [00:00<00:00, 750.05 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 187])


Map: 100%|██████████| 6/6 [00:00<00:00, 847.76 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 196])


Map: 100%|██████████| 6/6 [00:00<00:00, 836.55 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 185])


Map: 100%|██████████| 6/6 [00:00<00:00, 897.34 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 212])


Map: 100%|██████████| 6/6 [00:00<00:00, 834.55 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 204])


Map: 100%|██████████| 6/6 [00:00<00:00, 589.83 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 206])


Map: 100%|██████████| 6/6 [00:00<00:00, 835.96 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 207])


Map: 100%|██████████| 6/6 [00:00<00:00, 861.90 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 228])


Map: 100%|██████████| 6/6 [00:00<00:00, 914.19 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 217])


Map: 100%|██████████| 6/6 [00:00<00:00, 588.04 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 213])


Map: 100%|██████████| 6/6 [00:00<00:00, 778.14 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 217])


Map: 100%|██████████| 6/6 [00:00<00:00, 749.27 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 225])


Map: 100%|██████████| 6/6 [00:00<00:00, 499.31 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 237])


Map: 100%|██████████| 6/6 [00:00<00:00, 651.58 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 231])


Map: 100%|██████████| 6/6 [00:00<00:00, 659.86 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 243])


Map: 100%|██████████| 6/6 [00:00<00:00, 824.43 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 229])


Map: 100%|██████████| 6/6 [00:00<00:00, 566.44 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 256])


Map: 100%|██████████| 6/6 [00:00<00:00, 738.87 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 246])


Map: 100%|██████████| 6/6 [00:00<00:00, 691.65 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 243])


Map: 100%|██████████| 6/6 [00:00<00:00, 624.11 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 288])


Map: 100%|██████████| 6/6 [00:00<00:00, 610.44 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 287])


Map: 100%|██████████| 6/6 [00:00<00:00, 863.23 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 303])


Map: 100%|██████████| 6/6 [00:00<00:00, 645.63 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 350])


Map: 100%|██████████| 6/6 [00:00<00:00, 688.72 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 320])


Map: 100%|██████████| 6/6 [00:00<00:00, 702.84 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 336])


Map: 100%|██████████| 6/6 [00:00<00:00, 561.75 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 326])


Map: 100%|██████████| 6/6 [00:00<00:00, 544.07 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 365])


Map: 100%|██████████| 6/6 [00:00<00:00, 595.56 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 345])


Map: 100%|██████████| 6/6 [00:00<00:00, 567.60 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 381])


Map: 100%|██████████| 6/6 [00:00<00:00, 511.39 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 367])


Map: 100%|██████████| 6/6 [00:00<00:00, 444.01 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 392])


Map: 100%|██████████| 6/6 [00:00<00:00, 554.89 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 366])


Map: 100%|██████████| 6/6 [00:00<00:00, 574.82 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 434])


Map: 100%|██████████| 6/6 [00:00<00:00, 604.22 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 406])


Map: 100%|██████████| 6/6 [00:00<00:00, 602.67 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 460])


Map: 100%|██████████| 6/6 [00:00<00:00, 441.27 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 471])


Map: 100%|██████████| 6/6 [00:00<00:00, 527.77 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 467])


Map: 100%|██████████| 6/6 [00:00<00:00, 548.50 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 501])


Map: 100%|██████████| 6/6 [00:00<00:00, 429.00 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 559])


Map: 100%|██████████| 6/6 [00:00<00:00, 401.41 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 639])


Map: 100%|██████████| 6/6 [00:00<00:00, 374.50 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 764])


Map: 100%|██████████| 6/6 [00:00<00:00, 397.67 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([6, 926])


Map: 100%|██████████| 4/4 [00:00<00:00, 223.23 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([4, 991])


100%|██████████| 167/167 [07:26<00:00,  2.67s/it]


In [16]:
# lets see whether names are sensible
results_base["predictions"][:10]

['incrementer',
 'sum',
 'extract',
 'increment',
 'summarizer',
 'incrementer',
 'sum',
 'getX',
 'getModule',
 'typeChooserManager']

In [12]:
# free memory
del model
import gc
gc.collect()

torch.cuda.empty_cache()

In [2]:
# I will upload the trained LoRA adapter to hub here
from peft import AutoPeftModelForCausalLM

peft_model = AutoPeftModelForCausalLM.from_pretrained("./qlora-out/")
peft_model.push_to_hub("hynky/codellama-7b-sft-lora-func-names-java-4bit")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.05s/it]
CUDA extension not installed.
CUDA extension not installed.
adapter_model.safetensors: 100%|██████████| 1.28G/1.28G [00:42<00:00, 30.0MB/s] 


CommitInfo(commit_url='https://huggingface.co/hynky/codellama-7b-sft-lora-func-names-java-4bit/commit/28e94d36be189599227e59c8163c121370be0509', commit_message='Upload model', commit_description='', oid='28e94d36be189599227e59c8163c121370be0509', pr_url=None, pr_revision=None, pr_num=None)

## Finetune QLora

In [3]:
from pathlib import Path
from peft.auto import AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model_name = "hynky/codellama-7b-sft-lora-func-names-java-4bit"
# Load in 4-bit as was trained
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, load_in_4bit=True, device_map="auto",
                                             use_flash_attention_2=True)

tokenizer = AutoTokenizer.from_pretrained(model.name_or_path)
tokenizer.pad_token = tokenizer.eos_token

adapter_config.json: 100%|██████████| 570/570 [00:00<00:00, 1.90MB/s]
The model was loaded with use_flash_attention_2=True, which is deprecated and may be removed in a future release. Please use `attn_implementation="flash_attention_2"` instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.60s/it]
adapter_model.safetensors: 100%|██████████| 1.28G/1.28G [00:37<00:00, 34.4MB/s]


In [9]:
def apply_template(x, tokenizer):
    return {"text": tokenizer.apply_chat_template(
        [{"role": "system", "content": f"Given the source code of a python function, suggest a fitting name for the function."},
        {"role": "user", "content": x["source_code"]}],
        tokenize=False)
    }
dataset = test_dst.map(apply_template, fn_kwargs={"tokenizer": tokenizer})
# I am not sure why the model adds space at the end of each function name
# We have abit more memory because of quantization so we can pump up the batch size a bit
results_qlora = evaluate_with_model(model, tokenizer, dataset, 8, transform_predictions=lambda preds: [pred.strip() for pred in preds])

Map: 100%|██████████| 8/8 [00:00<00:00, 1042.55 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 160])


Map: 100%|██████████| 8/8 [00:00<00:00, 976.81 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 144])


Map: 100%|██████████| 8/8 [00:00<00:00, 1196.83 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 154])


Map: 100%|██████████| 8/8 [00:00<00:00, 906.12 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 159])


Map: 100%|██████████| 8/8 [00:00<00:00, 1114.02 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 162])


Map: 100%|██████████| 8/8 [00:00<00:00, 1058.27 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 171])


Map: 100%|██████████| 8/8 [00:00<00:00, 862.96 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 175])


Map: 100%|██████████| 8/8 [00:00<00:00, 949.66 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 184])


Map: 100%|██████████| 8/8 [00:00<00:00, 833.77 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 200])


Map: 100%|██████████| 8/8 [00:00<00:00, 954.63 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 192])


Map: 100%|██████████| 8/8 [00:00<00:00, 952.41 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 194])


Map: 100%|██████████| 8/8 [00:00<00:00, 997.10 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 216])


Map: 100%|██████████| 8/8 [00:00<00:00, 1047.01 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 205])


Map: 100%|██████████| 8/8 [00:00<00:00, 801.51 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 205])


Map: 100%|██████████| 8/8 [00:00<00:00, 867.58 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 213])


Map: 100%|██████████| 8/8 [00:00<00:00, 1071.92 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 225])


Map: 100%|██████████| 8/8 [00:00<00:00, 778.80 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 231])


Map: 100%|██████████| 8/8 [00:00<00:00, 750.22 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 217])


Map: 100%|██████████| 8/8 [00:00<00:00, 827.63 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 244])


Map: 100%|██████████| 8/8 [00:00<00:00, 924.64 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 234])


Map: 100%|██████████| 8/8 [00:00<00:00, 672.46 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 276])


Map: 100%|██████████| 8/8 [00:00<00:00, 772.13 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 275])


Map: 100%|██████████| 8/8 [00:00<00:00, 831.85 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 338])


Map: 100%|██████████| 8/8 [00:00<00:00, 634.82 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 308])


Map: 100%|██████████| 8/8 [00:00<00:00, 1049.33 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 324])


Map: 100%|██████████| 8/8 [00:00<00:00, 854.67 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 325])


Map: 100%|██████████| 8/8 [00:00<00:00, 715.68 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 353])


Map: 100%|██████████| 8/8 [00:00<00:00, 734.81 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 369])


Map: 100%|██████████| 8/8 [00:00<00:00, 810.26 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 380])


Map: 100%|██████████| 8/8 [00:00<00:00, 827.54 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 357])


Map: 100%|██████████| 8/8 [00:00<00:00, 620.75 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 422])


Map: 100%|██████████| 8/8 [00:00<00:00, 559.18 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 448])


Map: 100%|██████████| 8/8 [00:00<00:00, 661.43 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 459])


Map: 100%|██████████| 8/8 [00:00<00:00, 501.57 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 472])


Map: 100%|██████████| 8/8 [00:00<00:00, 667.54 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 547])


Map: 100%|██████████| 8/8 [00:00<00:00, 565.14 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 627])


Map: 100%|██████████| 8/8 [00:00<00:00, 487.44 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 752])


Map: 100%|██████████| 8/8 [00:00<00:00, 352.14 examples/s]
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


torch.Size([8, 979])


100%|██████████| 125/125 [02:45<00:00,  1.32s/it]


In [19]:
# Table with result
# Incredible improvents
# However because I didn't spend much time with datset creation,
# there could be some sort of contamination
# of test set.
import pandas as pd

results = {
    'Base': results_base,
    'Q-LoRA': results_qlora
}

df_results = pd.DataFrame(results)
df_results


Unnamed: 0,Base,Q-LoRA
accuracy,0.072,0.396
recall,0.311549,0.642465
precision,0.287981,0.658388
f1,0.288279,0.641804
predictions,"[incrementer, sum, extract, increment, summari...","[dispose, getName, getAllFiles, dispose, getPr..."


In [20]:
# We will now continue with QLora results
# First let's see where it made mistake
import random

expected = dataset["function_name"]
predicted = results_qlora["predictions"]
errors = [i for i in range(len(expected)) if expected[i] != predicted[i]]

for i in random.sample(errors, 10):
    print(f"Example {i+1}")
    print(f"Expected: {expected[i]}")
    print(f"Predicted: {predicted[i]}")
    print(f"Source code: \n{dataset['source_code'][i]}")
    print("-"*50)
# I would say that the errors made by LLM are fairly reasonable.
# Sometimes just a parts are mistakes as can be seen in realtively high f1 score

Example 883
Expected: processError
Predicted: error
Source code: 
@Override
  public void x(final SAXParseException ex, final ValidateXmlActionHandler.ProblemType problemType) {
    if (LOG.isDebugEnabled()) {
      String error = myHandler.buildMessageString(ex);
      LOG.debug("enter: x(error='" + error + "')");
    }

    if (ApplicationManager.getApplication().isUnitTestMode()) {
      return;
    }
    ApplicationManager.getApplication().invokeLater(() -> {
        final VirtualFile file = myHandler.getProblemFile(ex);
        myErrorsView.addMessage(
            new String[]{ex.getLocalizedMessage()},
            file,
            ex.getLineNumber() - 1,
            ex.getColumnNumber() - 1,
            null
        );
      }
    );
  }
--------------------------------------------------
Example 93
Expected: getBlockCommentPrefix
Predicted: getCommentStartSequence
Source code: 
@Override
  public @NotNull String x() {
    return "<!--";
  }
--------------------------------------

In [23]:
# Let's compare average length
# Not sure whether possible, but since I limit token generation to 100, I will not
# generate some super long function names, so that could affect the avg predicted length
avg_len_expected = sum(len(name) for name in expected) / len(expected)
avg_len_predicted = sum(len(name) for name in predicted) / len(predicted)

print(f"Average length of expected function names: {avg_len_expected}")
print(f"Average length of predicted function names: {avg_len_predicted}")


Average length of expected function names: 14.637
Average length of predicted function names: 13.972


In [24]:
# Lastly let's see how much will accuracy rise if we use stemming
# Unfornutately almost not at all only 7 sampels were fixed

from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
stemmed_expected = ["_".join(stemmer.stem(token) for token in get_fc_name_tokens(name)) for name in expected]
stemmed_predicted = ["_".join(stemmer.stem(token) for token in get_fc_name_tokens(name)) for name in predicted]
errors_stemmed = [i for i in range(len(stemmed_expected)) if stemmed_expected[i] != stemmed_predicted[i]]

print(f"Accuracy after stemming: {1 - len(errors)/len(expected)}")

# Let's see which erros were fixed
fixed_indices = set(errors) - set(errors_stemmed)
print(len(fixed_indices))
for i in fixed_indices:
    print(f"Example {i+1}")
    print(f"Expected: {expected[i]}")
    print(f"Predicted: {predicted[i]}")
    print(f"Source code: \n{dataset['source_code'][i]}")
    print("-"*50)

Accuracy after stemming: 0.396
3
Example 513
Expected: setXmlSchemaVersion
Predicted: setXMLSchemaVersion
Source code: 
@Override
  public void x(XMLSchemaVersion version, @NotNull Project project) {
    getProjectResources(project).myXMLSchemaVersion = version;
    fireExternalResourceChanged();
  }
--------------------------------------------------
Example 833
Expected: initSubstitutes
Predicted: initSubstitutions
Source code: 
private boolean x() {
    if (mySubstitutions == null && myTag != null) {
      mySubstitutions = new MultiMap<>();

      XmlTag[] tags = myTag.getSubTags();

      for (XmlTag tag : tags) {
        if (equalsToSchemaName(tag, ELEMENT_TAG_NAME)) {
          final String substAttr = tag.getAttributeValue("substitutionGroup");
          if (substAttr != null) {
            String substLocalName = XmlUtil.findLocalNameByQualifiedName(substAttr);
            mySubstitutions.putValue(substLocalName, tag);
          }
        }
      }
    }
    return mySubstituti