# Evaluation of the [phi-1.5](https://huggingface.co/microsoft/phi-1_5) model on the Kotlin and Python test sets


### Code to predict the next code line given the predeceding 5 lines (model cannot handle a very long sequence at one time)

Another approach is to predict line by line but here I want to predict only the last one line.

In [None]:
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from typing import List, Tuple
from torch import nn
from tqdm import tqdm
import os
from fuzzywuzzy import fuzz
import re

In [128]:
def load_special_tokens(path: str) -> List[str]:

    """
    Load special tokens from a JSON file and format them into a list.
    """

    with open(path, "r") as file:
        literals = json.load(file)
    tokens = ["<STR_LIT>", "<NUM_LIT>", "<CHAR_LIT>"]
    tokens.extend(f"<STR_LIT:{lit}>" for lit in literals["str"])
    tokens.extend(f"<NUM_LIT:{lit}>" for lit in literals["num"])
    tokens.extend(f"<CHAR_LIT:{lit}>" for lit in literals["char"])

    return tokens


def load_model(model_name: str,
               special_tokens_path: str) -> Tuple[AutoTokenizer, nn.Module]:

    """
    Load a pretrained tokenizer and model from Hugging Face, and add special tokens.
    """

    special_tokens = load_special_tokens(special_tokens_path)
    tokenizer = AutoTokenizer.from_pretrained(model_name,
                                              additional_special_tokens=special_tokens)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    model.resize_token_embeddings(len(tokenizer))  # Important to resize model token embeddings
    model.eval()

    return tokenizer, model


def predict_next_line(code: str, tokenizer: AutoTokenizer,
                      model: nn.Module, device: str = 'cuda') -> str:

    """
    Predict the next line of code given an input sequence of code.
    """

    model.to(device)
    inputs = tokenizer.encode(code, return_tensors="pt").to(device)
    outputs = model.generate(inputs, max_length=512, num_return_sequences=1)
    predicted_code = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return predicted_code


def read_and_predict(json_file: str, tokenizer: AutoTokenizer,
                     model: torch.nn.Module, device: str = 'cuda') -> None:

    """
    Read JSON file containing code inputs, and predict the next line for each input.
    """

    outputs = []
    with open(json_file, 'r') as file:
        for n, line in enumerate(file):
            try:
                json_object = json.loads(line)
                input_lines = json_object['input'].split('<EOL>')
                # Keep only the last 5 lines
                if len(input_lines) > 5:
                    input_lines = input_lines[-5:]
                input_code = '<EOL>'.join(input_lines) + '<EOL>'
                num_lines = len(input_code.split('<EOL>')) - 1
                predicted_line = predict_next_line(input_code, tokenizer, model, device)
                predicted_line = predicted_line.replace('\n', '<EOL>')
                print(predicted_line.split('<EOL>'))
                print(predicted_line.split('<EOL>')[num_lines])
                outputs.append(predicted_line.split('<EOL>')[num_lines])
                print(n)

            except json.JSONDecodeError as e:
                print(f"Error reading JSON: {e}")

    return outputs

In [51]:
model_name = "microsoft/phi-1.5"
special_tokens_path = "literals.json"
tokenizer, model = load_model(model_name, special_tokens_path)

Example usage

In [84]:
code_snippet = "import numpy as np\nnp.random.seed(42)\n"
num_lines = len(code_snippet.split('\n'))
predicted_line = predict_next_line(code_snippet, tokenizer, model)
print("Predicted next line:", predicted_line.split('\n')[:num_lines])

Predicted next line: ['import numpy as np', 'np.random.seed(42)', '']


### Code to evaluate the predictions

In [167]:
def post_process(code: str) -> str:

    """ Converting special symbols in a code string to their respective
    literals or removing them """

    code = code.replace("<NUM_LIT>", "0").replace("<STR_LIT>", "").replace("<CHAR_LIT>", "")
    pattern = re.compile(r"<(STR|NUM|CHAR)_LIT:(.*?)>", re.S)
    lits = re.findall(pattern, code)
    for lit in lits:
        code = code.replace(f"<{lit[0]}_LIT:{lit[1]}>", lit[1])
    return code


def evaluate(answers_path: str, predictions_path: str) -> None:

    """ Evaluating predictions against ground truth answers,
    computing exact match (EM) and edit similarity metrics """

    data = []
    with open(answers_path, 'r') as i_file:
        for line in i_file:
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")
                continue

    with open(predictions_path, "r") as f:
        gts = f.readlines()

    assert len(data) == len(gts), f"Samples of predictions and answers are not equal, {len(data)}: {len(gts)}"

    total = len(gts)
    EM = 0.0
    edit_sim = 0.0
    for i, (gt, pred) in enumerate(zip(data, gts)):
        try:
            pred = post_process(pred.strip())
            gt = post_process(gt["gt"])
            edit_sim += fuzz.ratio(pred, gt)
            if pred.split() == gt.split():
                EM += 1
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON on line {i + 1}: {gt}")
            print(e)
            continue  # Skip this line or handle it as needed

    edit_similarity = round(edit_sim / total, 2) if total else 0
    exact_match = round((EM / total) * 100, 2) if total else 0
    print(f"Edit sim: {edit_similarity}, EM: {exact_match}")


### Evaluation on CodeXGLUE test set (line level)

In [122]:
json_file = "/content/CodeXGLUE_test_processed.json"

In [None]:
python_outputs = read_and_predict(json_file=json_file, tokenizer=tokenizer, model=model)

In [131]:
with open('predictions_python.txt', 'w') as fp:
    for item in python_outputs:
        fp.write(item)
        fp.write('\n')

In [168]:
evaluate('/content/CodeXGLUE_test_answers.json',
         '/content/predictions_python.txt')

Edit sim: 28.42, EM: 9.0


### Evaluation on Kotlin test set (line level)

In [169]:
json_file = "/content/kotlin_code_test.json"

In [None]:
kotlin_outputs = read_and_predict(json_file=json_file, tokenizer=tokenizer, model=model)

In [171]:
with open('predictions_kotlin.txt', 'w') as fp:
    for item in kotlin_outputs:
        fp.write(item)
        fp.write('\n')

In [172]:
evaluate('/content/kotlin_code_answers.json',
         '/content/predictions_kotlin.txt')

Edit sim: 14.69, EM: 43.0
