In [1]:
from transformers import PhiForCausalLM, AutoTokenizer

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import numpy as np

import utils
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "microsoft/phi-1_5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = PhiForCausalLM.from_pretrained(model_name).to(device)

In [3]:
kotlin_code = utils.extract_kotlin_code()
train_kotlin_prompts, test_kotlin_prompts, train_kotlin_answers, test_kotlin_answers = train_test_split(*kotlin_code, test_size=1000, random_state=42)

train_dataset = utils.CodeCompletionDataset(train_kotlin_prompts, train_kotlin_answers, train=True)

test_kotlin_dataset = utils.CodeCompletionDataset(test_kotlin_prompts, test_kotlin_answers, train=False)

test_codexglue_dataset = utils.CodeCompletionDataset(*utils.read_codexglue_test_data(n=1000), train=False)

Looking for kotlin files...
Parsing functions in kotlin files...


100%|██████████| 54432/54432 [11:41<00:00, 77.57it/s]  

parse errors count: 18912, declaration errors count: 0
total number of samples: 60051





In [6]:
for key, value in utils.evaluate(model, tokenizer, test_codexglue_dataset, max_new_tokens=20).items():
    print(f"{key}: {value}")

prompt, answer = test_codexglue_dataset[0]
print(f"\nexample prompt:\n{prompt}\n")
print(f"example completion:\n{utils.sample(model, tokenizer, prompt, min_new_tokens=2, max_new_tokens=20)}\n")
print(f"example true answer:\n{answer}\n")

100%|██████████| 1000/1000 [07:11<00:00,  2.32it/s]


accuracy score: 0.0
bleu score: 0.0056283212581165545
rouge: 0.2210622061584594

example prompt:
def debug(user, message):
    """
    Adds a message with the ``DEBUG`` level.
    
    :param user: User instance
    :param message: Message to show
    """
    

example completion:
if user.level == 'DEBUG':
        user.log.debug(message)
    

example true answer:
message_user(user, message, constants.DEBUG)




In [4]:
for key, value in utils.evaluate(model, tokenizer, test_kotlin_dataset, max_new_tokens=20).items():
    print(f"{key}: {value}")

prompt, answer = test_kotlin_dataset[0]
print(f"\nexample prompt: {prompt}\n")
print(f"example completion: {utils.sample(model, tokenizer, prompt, min_new_tokens=2, max_new_tokens=20)}\n")
print(f"example true answer: {answer}\n")

100%|██████████| 1000/1000 [07:03<00:00,  2.36it/s]


accuracy score: 0.0
bleu score: 0.001848171824365358
rouge: 0.07783621991431407

example prompt: fun createMutableListFrom(array: dynamic): MutableList<E> 

example completion: 
    {
        var list = new MutableList<E>();
        for (

example true answer: TODO("Use WITH_STDLIB pragma to use this function")



In [11]:
import os

finished_epochs = 0
for i in range(6, -1, -1):
    if os.path.isdir(f"./checkpoint_after_epoch_{i}"):
        model = PhiForCausalLM.from_pretrained(f"./checkpoint_after_epoch_{i}").to(device)
        finished_epochs = i + 1
        break

model = utils.train_model(model, tokenizer, train_dataset, learning_rate=1e-4 * 0.5 ** finished_epochs, start_epoch=finished_epochs)

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]


wandb: ERROR Error while calling W&B API: run finetune/1w4ee1vr not found during createRunFiles (<Response [404]>)
wandb: ERROR Error while calling W&B API: run finetune/1w4ee1vr not found during createRunFiles (<Response [404]>)
wandb: ERROR Error while calling W&B API: run finetune/1w4ee1vr not found during createRunFiles (<Response [404]>)
wandb: ERROR Error while calling W&B API: run finetune/1w4ee1vr not found during createRunFiles (<Response [404]>)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


0,1
loss,▅▃▁▁▂▃▂▂▃▃▂▂▃▂▄▁▆▄▂▄▂▅▆▂▂█▇▃▄▃▅▃▄▃▄▄▂▂▃▃

0,1
loss,0.82764


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Epoch [1/2], Loss: 0.6558: 100%|██████████| 29526/29526 [1:25:11<00:00,  5.78it/s]  
Epoch [2/2], Loss: 0.3602: 100%|██████████| 29526/29526 [1:25:01<00:00,  5.79it/s]  


0,1
loss,▂▂▂▄▅▂▄▂▂▃▃▁▂▂▂▃▂▂▄▂▄▂█▂▂▃▂▂▂▂▂▂▁▂▂▂▁▂▅▂

0,1
loss,0.36022


In [13]:
for key, value in utils.evaluate(model, tokenizer, test_codexglue_dataset, max_new_tokens=20).items():
    print(f"{key}: {value}")

prompt, answer = test_codexglue_dataset[0]
print(f"\nexample prompt: {prompt}\n")
print(f"example completion: {utils.sample(model, tokenizer, prompt, min_new_tokens=2, max_new_tokens=20)}\n")
print(f"example true answer: {answer}\n")

100%|██████████| 1000/1000 [06:33<00:00,  2.54it/s]


accuracy score: 0.0
bleu score: 0.0020034207402879678
rouge: 0.19129313454196817

example prompt: def debug(user, message):
    """
    Adds a message with the ``DEBUG`` level.
    
    :param user: User instance
    :param message: Message to show
    """
    

example completion: if (user.isVerbose):
        user.log.debug(message)


example true answer: message_user(user, message, constants.DEBUG)




In [14]:
for key, value in utils.evaluate(model, tokenizer, test_kotlin_dataset, max_new_tokens=20).items():
    print(f"{key}: {value}")

prompt, answer = test_kotlin_dataset[0]
print(f"\nexample prompt: {prompt}\n")
print(f"example completion: {utils.sample(model, tokenizer, prompt, min_new_tokens=2, max_new_tokens=20)}\n")
print(f"example true answer: {answer}\n")

100%|██████████| 1000/1000 [03:28<00:00,  4.79it/s]


accuracy score: 0.014
bleu score: 0.00045317261070564204
rouge: 0.11821222754168945

example prompt: fun createMutableListFrom(array: dynamic): MutableList<E> 

example completion: ile(array)

example true answer: TODO("Use WITH_STDLIB pragma to use this function")



In [15]:
model.save_pretrained("fine_tuned_model")