Download Repo

In [2]:
from get_repo import clone_repo
url = "https://github.com/ataj09/ClearSpeech.git" #define repo url
repo_dir = "repo" #define directory for cloning
clone_repo(url, repo_dir)


Split code into prefix, suffix and middle and save them into code_completion_dataset.json


In [6]:
from split_code import generate_dataset
import json

dataset = generate_dataset(repo_dir, num_examples=50)
with open('code_completion_dataset.json', 'w') as f:
    json.dump(dataset, f, indent=4)

Use Tiny starcoder to generate predictions and save them to code_completion.json

In [7]:
# Load the code completion model
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import json
from transformer_stuff import complete_code

model_name = "bigcode/tiny_starcoder_py"
device = "cpu"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

# Load dataset
with open('code_completion_dataset.json', 'r') as f:
    dataset = json.load(f)

# Run the model on all examples
completions = []
for example in dataset:
    completion = complete_code(example['prefix'], example['suffix'], tokenizer,device,model)
    completions.append({
        'prefix': example['prefix'],
        'middle_actual': example['middle'],
        'middle_predicted': completion,
        'suffix': example['suffix'],
        'label': ""
    })

# Save the completions for manual review
with open('code_completions.json', 'w') as f:
    json.dump(completions, f, indent=4)

print("Code completions saved to code_completions.json")


Code completions saved to code_completions.json


Manualy review and label the completions, calculate exact match chrf, Levenshtein distance

In [1]:
import json
import sacrebleu
import Levenshtein
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction


with open('code_completions.json', 'r') as f:
    completions = json.load(f)

def exact_match(pred, actual):
    return int(pred.strip() == actual.strip())

def levenshtein_distance(pred, actual):
    return Levenshtein.distance(pred.strip(), actual.strip())

def calculate_bleu_score(reference, hypothesis):
    reference_tokens = reference.strip().split()
    hypothesis_tokens = hypothesis.strip().split()
    bleu_score = sentence_bleu([reference_tokens], hypothesis_tokens, smoothing_function=SmoothingFunction().method1)
    return bleu_score

exact_matches = []
levenshtein_distances = []
chrf_scores = []
bleu_scores = []


for completion in completions:
    pred = completion['middle_predicted']
    actual = completion['middle_actual']
    exact_matches.append(exact_match(pred, actual))
    levenshtein_distances.append(levenshtein_distance(pred, actual))
    chrf = sacrebleu.sentence_chrf(pred.strip(), [actual.strip()])
    chrf_scores.append(chrf.score)
    bleu_score = calculate_bleu_score(actual, pred)
    bleu_scores.append(bleu_score)
    

avg_exact_match = sum(exact_matches) / len(exact_matches)
avg_levenshtein = sum(levenshtein_distances) / len(levenshtein_distances)
avg_chrf = sum(chrf_scores) / len(chrf_scores)
average_bleu = sum(bleu_scores) / len(bleu_scores)

print(f"Average BLEU Score: {average_bleu:.4f}")
print(f"Exact Match: {avg_exact_match:.4f}")
print(f"Avg Levenshtein Distance: {avg_levenshtein:.4f}")
print(f"Avg CHRF: {avg_chrf:.4f}")

Average BLEU Score: 0.0242
Exact Match: 0.0000
Avg Levenshtein Distance: 866.5000
Avg CHRF: 22.0180
