# Analyse pre-trained model's output

In [None]:
from google.colab import drive
# drive.mount('/content/drive')

In [None]:
from google.colab import runtime
runtime.unassign()

In [None]:
!pip install fuzzywuzzy
!pip install python-Levenshtein
!pip install tree-sitter
!pip install rouge-score

# Open model & load data

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import datetime
import torch
import random
import numpy as np
import json
import re
import pandas as pd
import tree_sitter
from fuzzywuzzy import fuzz
import Levenshtein
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
# Info is here: https://github.com/seatgeek/thefuzz

In [None]:
torch.set_default_device("cuda")

print("Loading model...")
time = datetime.datetime.now()
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
time1 = datetime.datetime.now()
print(f"Model loaded. Time to load the model: {time1 - time}")

Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/736 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Model loaded. Time to load the model: 0:00:14.755636


In [None]:
def replace_tags(code):
    """
    Replaces special tags in the input code with their corresponding literals or empty strings.
    Original function is here:
    https://github.com/microsoft/CodeXGLUE/blob/main/Code-Code/CodeCompletion-line/evaluator/evaluator.py

    Parameters:
        code (str): The input code containing special tags.

    Returns:
        str: The code with tags replaced by literals or empty strings.
    """
    # Replace special tags with their corresponding literals or empty strings
    code = code.replace("<NUM_LIT>", "0").replace("<STR_LIT>", "").replace("<CHAR_LIT>", "")

    # Find literals enclosed in special tags and replace them with the literal itself
    pattern = re.compile(r"<(STR|NUM|CHAR)_LIT:(.*?)>", re.S)
    lits = re.findall(pattern, code)
    for lit in lits:
        code = code.replace(f"<{lit[0]}_LIT:{lit[1]}>", lit[1])

    # Find special tags and replace them with empty spaces
    pattern = r'<([A-Z][^<>]*)>'
    liners = re.findall(pattern, code)
    for tag in liners:
        code = code.replace(f'<{tag}>', ' ')

    return code

def read_jsonl_file(file_path):
    """
    Reads a JSONL file and replaces special tags in the 'signature' and 'body' fields of each JSON object.

    Parameters:
        file_path (str): The path to the JSONL file.

    Returns:
        list: A list of dictionaries, each containing the modified JSON objects.
    """
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            json_obj = json.loads(line)
            json_obj['signature'] = replace_tags(json_obj['signature'])
            json_obj['body'] = replace_tags(json_obj['body'])
            data.append(json_obj)
    return data

file_path = '/content/drive/MyDrive/CodeCompletion/CodeXGlue/test.jsonl'
codexglue_test = read_jsonl_file(file_path)
print(codexglue_test[0])

file_path = '/content/drive/MyDrive/CodeCompletion/functions_df_inputs_outputs.csv'
functions_df = pd.read_csv(file_path)
print(functions_df.iloc[0])

{'signature': 'def debug(user, message):', 'body': 'message_user(user, message, constants.DEBUG) ', 'docstring': 'Adds a message with the ``DEBUG`` level.\n\n:param user: User instance\n:param message: Message to show', 'id': 'f4:m0'}
Unnamed: 0                                                              0
function_id                                                         27692
signature               private fun bitIndex(elementIndex: Int, bitOff...
body                    =\n        elementIndex * ELEMENT_SIZE + bitOf...
is_single_expression                                                 True
is_test                                                             False
0-20                                                                False
100+                                                                False
20-50                                                               False
50-100                                                               True
Name: 0, dtype: object


# Control ouput
There is often code with repetitions and unnecessary information generated by the model. We need to create some tools to control our output.


In [None]:
def extract_function_python(code, signature):
    """
    The fastest way to extract function from the output - regex.
    But this approach has some downfalls, s.t. it does not consider nested function
    """
    # Regular expression pattern to match function definitions
    pattern = r"def\s+(\w+)\s*\(.*?\):[\s\S]*?(?=def|\Z)"

    match = re.search(pattern, code)

    if match and not match.group(0).split(signature, 1)[-1].strip() == '':
        return match.group(0)
    else:
        return str(code)

def greet(name):
    print("Hello, " + name + "!")




In [None]:
LANGUAGE_BUILDER_PATH = '/content/drive/MyDrive/CodeCompletion/parser/build/my-language.so'
REPO_PATHS = '/content/drive/MyDrive/CodeCompletion/parser/tree-sitter-kotlin'

tree_sitter.Language.build_library(LANGUAGE_BUILDER_PATH, [REPO_PATHS])
KOTLIN_LANGUAGE = tree_sitter.Language(LANGUAGE_BUILDER_PATH, 'kotlin')
parser_kotlin = tree_sitter.Parser()
parser_kotlin.set_language(KOTLIN_LANGUAGE)

def extract_function_kotlin(code, signature):
  pattern = r"fun\s+(\w+)\s*\(.*?\)\s*{[\s\S]*?(?=fun|\Z)"
  match = re.search(pattern, code)

  if match and not match.group(0).split(signature, 1)[-1].strip() == '':
    return match.group(0)
  else:
    tree = parser_kotlin.parse(bytes(code, "utf8"))
    root_node = tree.root_node
    if root_node.children[0].type == 'function_declaration':
      return root_node.children[0].text.decode('utf-8')
    else:
      return str(code)

fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult <String> {
    val cache = Cache<String, BuildResult<String>>()
    val taskPaths = taskPaths.toList()
    taskPaths.forEach { taskPath ->
        val task = Task(taskPath)
        val result = task.run()
        cache.put(taskPath, result)
    }
}


In [None]:
# Example of usage
code = """
def debug(user, message): print(f'{user} - {message}')
def _check(path):  messageFiles = path.globChildren('*') for message in messageFiles:  if message.basename().endswith(''):  continue  receiver.message(message.getContent()) message.remove() return functools.partial(_check, path) def has_resuming(): return False
"""

# Extract the first function definition
extracted_fun = extract_function_python(code, 'def debug(user, message):')
print(extracted_fun)

# Output evaluation
Generate output for some small sample from CodeXGlue and compute few metrics

In [None]:
# We don't need stemmer here, because we want to compare full tokens
scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=False, tokenizer=tokenizer)

def compute_metrics(ref, text):
  edit_sim = fuzz.ratio(text, ref)
  chrf_score = nltk.translate.chrf_score.chrf_precision_recall_fscore_support(ref, text, 4)[2] # Return only f-score
  rogue_scores = scorer.score(ref, text)['rouge1']
  return edit_sim, chrf_score, rogue_scores.precision, rogue_scores.recall, rogue_scores.fmeasure

In [None]:
# Example of computing metrics
generated_code = '''
def loads(s, strip_comments=False, **kw):
    return [Node.create(s, **kw) for s in s.split('\n') if s]
'''

reference_code = '''
def loads(s, strip_comments=False, **kw):
    kw[''] = strip_comments
    return [parse_node(ss.strip(), **kw) for ss in s.split(';') if ss.strip()]
'''

def print_metrics(reference_code, generated_code):
  metrics = compute_metrics(reference_code, generated_code)
  metrics_names = ['edit_sim', 'chrf_score', 'rogue_scores']
  print("Results: ")
  for i in range(len(metrics)):
    if metrics_names[i] == 'rogue_scores':
      token = scorer._tokenizer
      print(f"Generated tokens: \n\tGenerated code tokenized length: {len(token(generated_code)['input_ids'])} \n\tReference code tokenized length: {len(token(reference_code)['input_ids'])}")
      print(metrics_names[i])
      for key, value in metrics[i].items():
        print(f'\t{key}: {value}')
    else:
      print(f"{metrics_names[i]}: {metrics[i]}")

print_metrics(reference_code, generated_code)

Results: 
edit_sim: 75
chrf_score: 0.5131761442441054
Generated tokens: 
	Generated code tokenized length: 41 
	Reference code tokenized length: 58
rogue_scores
	rouge1: Score(precision=0.8536585365853658, recall=0.603448275862069, fmeasure=0.7070707070707071)


# Python code completion
Try few-shot code generation using Phi-1.5, with examples randomly selected from a dataset to provide context


In [None]:
index = 12

def create_prompt_codex(dataset, index, num_examples, context=None, language='Python'):
  indices = random.sample(range(len(dataset)), num_examples)
  prefix = f'Complete code\nLanguage: {language}\n'
  shots = '\n'.join([f"Example: {dataset[i]['signature']} {dataset[i]['body']}" for i in indices])

  data = dataset[index]
  if context:
    prompt = f"{prefix}\n{shots}\n{context}\nCode so far: {data['signature']}"
  else:
    prompt = f"{prefix}\n{shots}\nCode so far: {data['signature']}"

  return prompt

prompt = create_prompt_codex(codexglue_test, index, 2)
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

print(f'\033[96mFew-shots prompt (without context of functions): \n{prompt}\n')

[96mFew-shots prompt (without context of functions): 
Complete code
Language: Python

Example: def _set_state(self, v, load=False): if hasattr(v, ""):  v = v._utype(v)  try:  t = YANGDynClass( v, base=state.state, is_container="", yang_name="state", parent=self, path_helper=self._path_helper, extmethods=self._extmethods, register_paths=True, extensions=None, namespace="", defining_module="", yang_type="", is_config=True, )  except (TypeError, ValueError):  raise ValueError( { "": """""", "": "", "": """""", } )  self.__state = t if hasattr(self, ""):  self._set()  
Example: def list_operations( self, custom_headers=None, raw=False, **operation_config): def internal_paging(next_link=None, raw=False):  if not next_link:  url = self.list_operations.metadata['url'] query_parameters = {} query_parameters[''] = self._serialize.query("", self.api_version, 'str')  else:  url = next_link query_parameters = {}  header_parameters = {} header_parameters['Content-Type'] = '' if self.config.generat

In [None]:
print("Generating output...")
time = datetime.datetime.now()

outputs = model.generate(**inputs, max_length=inputs['input_ids'].shape[-1] + 100)
text_python = tokenizer.batch_decode(outputs)[0]
function = text_python.split("Code so far: ", 1)[-1]
function = extract_function_python(function)

time1 = datetime.datetime.now()
print(f"Output generated. Time to generate the output: {time1 - time}. \nOutput:")
print(f'\033[92m{function}')

Generating output...
Output generated. Time to generate the output: 0:00:02.995710. 
Output:
[92mdef loads(s, strip_comments=False, **kw):  if strip_comments:  s = s.split('\n')  return [self._deserialize.loads(line, **kw) for line in s] 


In [None]:
reference = codexglue_test[index]['signature'] + codexglue_test[index]['body']
print(f'Reference: \033[92m{reference}\033[0m')
print_metrics(function, generated_code)

Reference: [92mdef loads(s, strip_comments=False, **kw):kw[''] = strip_comments return [parse_node(ss.strip(), **kw) for ss in s.split(';') if ss.strip()] [0m
Results: 
edit_sim: 64
chrf_score: 0.48399106478034254
Generated tokens: 
	Generated code tokenized length: 41 
	Reference code tokenized length: 52
rogue_scores
	rouge1: Score(precision=0.7804878048780488, recall=0.6153846153846154, fmeasure=0.6881720430107527)


Now, let's add context of a function

In [None]:
function_context = f"Function context: {codexglue_test[12]['docstring']}"
prompt = create_prompt_codex(codexglue_test, 12, 2, function_context)
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
print(f'\033[96mmFew-shots prompt (with context of functions): \n{prompt}\n')

[96mmFew-shots prompt (with context of functions): 
Complete code
Language: Python

Example: def setUp(self): DirectoryBasedTest.setUp(self) self.receiver = EventRecorder() self.monitor = directory_monitor.checker(self.testDirectory, self.receiver) self.assertFalse(self.receiver.events) 
Example: def splitdrive(p): return '', p 
Function context: Load a list of trees from a Newick formatted string.

:param s: Newick formatted string.
:param strip_comments: Flag signaling whether to strip comments enclosed in square \
brackets.
:param kw: Keyword arguments are passed through to `Node.create`.
:return: List of Node objects.
Code so far: def loads(s, strip_comments=False, **kw):



In [None]:
print("Generating output...")
time = datetime.datetime.now()

outputs = model.generate(**inputs, max_length=inputs['input_ids'].shape[-1] + 80)
text_python_context = tokenizer.batch_decode(outputs)[0]
function = text_python_context.split("Code so far: ", 1)[-1]
function = extract_function_python(function)

time1 = datetime.datetime.now()
print(f"Output generated. Time to generate the output: {time1 - time}. \nOutput:")
print(f'\033[92m{function}')

Generating output...
Output generated. Time to generate the output: 0:00:02.335434. 
Output:
[92mdef loads(s, strip_comments=False, **kw):
    """
    Load a list of trees from a Newick formatted string.
    """
    if strip_comments:
        s = s.replace('[', '').replace(']', '')
    return [Node.create(s, **kw) for s in s.split('\n') if s]

:param s: Newick formatted


In [None]:
reference = codexglue_test[index]['signature'] + codexglue_test[index]['body']
print(f'Reference: \033[92m{reference}\033[0m')
print_metrics(function, generated_code)

Reference: [92mdef loads(s, strip_comments=False, **kw):kw[''] = strip_comments return [parse_node(ss.strip(), **kw) for ss in s.split(';') if ss.strip()] [0m
Results: 
edit_sim: 54
chrf_score: 0.3766182816790898
Generated tokens: 
	Generated code tokenized length: 41 
	Reference code tokenized length: 94
rogue_scores
	rouge1: Score(precision=1.0, recall=0.43617021276595747, fmeasure=0.6074074074074074)


# Kotlin code completion
With the same methods used as for Python generation (few-shot + context)

In [None]:
index = 12

def create_prompt_kotlin(dataset, index, num_examples, context=None, language='Kotlin'):
  indices = random.sample(range(len(dataset)), num_examples)
  prefix = f'Complete code\nLanguage: {language}\n'
  shots = '\n'.join([f"Example: {dataset.iloc[i]['signature']} {dataset.iloc[i]['body']}" for i in indices])

  data = dataset.iloc[index]
  if context:
    prompt = f"{prefix}\n{shots}\n{context}\nCode so far: {data['signature']}"
  else:
    prompt = f"{prefix}\n{shots}\nCode so far: {data['signature']}"

  return prompt

prompt = create_prompt_kotlin(functions_df, index, 2)
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

print(f'\033[96mFew-shots prompt (without context of functions): \n{prompt}\n')

[96mFew-shots prompt (without context of functions): 
Complete code
Language: Kotlin

Example: inline fun <T, R1, R2, R3> map3(source: MyDeferred<T>, crossinline mapper1: (T) -> R1, crossinline mapper2: (R1) -> R2, crossinline mapper3: (R2) -> R3) + =
    async {
        val c = suspend {
            val c = suspend {
                mapper1(source.await())
            }
            mapper2(c())
        }
        mapper3(c())
    }
Example: inline infix fun <R> inlineFun(crossinline p: () -> R) {
        p()
    }
Code so far: fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult



In [None]:
print("Generating output...")
time = datetime.datetime.now()

outputs = model.generate(**inputs, max_length=inputs['input_ids'].shape[-1] + 80)
text_kotlin = tokenizer.batch_decode(outputs)[0]
function = text_kotlin.split("Code so far: ", 1)[-1]
function = extract_function_kotlin(function)

time1 = datetime.datetime.now()
print(f"Output generated. Time to generate the output: {time1 - time}. \nOutput:")
print(f'\033[92m{function}')

Generating output...
Output generated. Time to generate the output: 0:00:02.406653. 
Output:
[92mfun assertTasksPackedToCache(vararg taskPaths: String): BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult<String, BuildResult


In [None]:
reference = functions_df.iloc[index]['signature'] + functions_df.iloc[index]['body']
print(f'Reference: \033[92m{reference}\033[0m')
print_metrics(function, generated_code)

Reference: [92mfun assertTasksPackedToCache(vararg taskPaths: String): BuildResult{
    taskPaths.forEach {
        assertOutputContains("Stored cache entry for task '$it' with cache key ")
    }
}[0m
Results: 
edit_sim: 21
chrf_score: 1e-16
Generated tokens: 
	Generated code tokenized length: 41 
	Reference code tokenized length: 99
rogue_scores
	rouge1: Score(precision=0.14634146341463414, recall=0.06060606060606061, fmeasure=0.08571428571428572)


In [None]:
# GPT-generated
function_context = '''Function context: The assertTasksPackedToCache function verifies that cache entries have been stored for
the provided task paths by iterating through each path and asserting that the output contains
a specific message indicating the cache entry\'s storage.'''

prompt = create_prompt_kotlin(functions_df, index, 2, function_context)
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
print(f'\033[96mFew-shots prompt (with context of functions): \n{prompt}\n')

[96mFew-shots prompt (with context of functions): 
Complete code
Language: Kotlin

Example: fun dangerous(): String {
    return "foo"
}
Example: fun countFilteredSome(): Int {
        return data.count { filterSome(it) }
    }
Function context: The assertTasksPackedToCache function verifies that cache entries have been stored for
the provided task paths by iterating through each path and asserting that the output contains
a specific message indicating the cache entry's storage.
Code so far: fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult



In [None]:
print("Generating output...")
time = datetime.datetime.now()

outputs = model.generate(**inputs, max_length=inputs['input_ids'].shape[-1] + 80)
text_kotlin_context = tokenizer.batch_decode(outputs)[0]
function = text_kotlin_context.split("Code so far: ", 1)[-1]
function = extract_function_kotlin(function)

time1 = datetime.datetime.now()
print(f"Output generated. Time to generate the output: {time1 - time}. \nOutput:")

print(f'\033[92m {function}')

Generating output...
Output generated. Time to generate the output: 0:00:02.357886. 
Output:
[92m fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult {
    assert(taskPaths.size == 1)
    assert(taskPaths[0] == "foo")
    assert(taskPaths[0] == "bar")
    assert(taskPaths[0] == "baz")
    assert(taskPaths[0] == "qux")
    assert(taskPaths[0


In [None]:
reference = functions_df.iloc[index]['signature'] + functions_df.iloc[index]['body']
print(f'Reference: \033[92m{reference}\033[0m')
print_metrics(function, generated_code)

Reference: [92mfun assertTasksPackedToCache(vararg taskPaths: String): BuildResult{
    taskPaths.forEach {
        assertOutputContains("Stored cache entry for task '$it' with cache key ")
    }
}[0m
Results: 
edit_sim: 26
chrf_score: 0.008285004142502071
Generated tokens: 
	Generated code tokenized length: 41 
	Reference code tokenized length: 99
rogue_scores
	rouge1: Score(precision=0.3170731707317073, recall=0.13131313131313133, fmeasure=0.18571428571428572)



# LLM output control
Since we can see that the model generates unnecessary parts and repeats some code, we can try to use some techniques for preventing it


In [None]:
# Let's take the last prompt
prompt = prompt
print("Prompt:")
print(prompt)
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)

# Top-k Sampling: Sampling from the top k most likely next words
outputs = model.generate(
    **inputs,
    max_length=inputs['input_ids'].shape[-1] + 80,
    do_sample=True,
    top_k=50
)
text_top_k = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
function = extract_function_kotlin(text_top_k.split("Code so far: ", 1)[-1])
print("\nGenerated text with Top-k Sampling:")
print(f'\033[92m {function}')

Prompt:
Complete code
Language: Kotlin

Example: fun dangerous(): String {
    return "foo"
}
Example: fun countFilteredSome(): Int {
        return data.count { filterSome(it) }
    }
Function context: The assertTasksPackedToCache function verifies that cache entries have been stored for
the provided task paths by iterating through each path and asserting that the output contains
a specific message indicating the cache entry's storage.
Code so far: fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult

Generated text with Top-k Sampling:
[92m fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult = it.filter { taskPath in
    println("Testing ${taskPath}")
    val cache = TaskCache.loaded(taskPath)
    val results_hash = CacheUtils.get(cache.key, cache.cacheEntry)
    println("  Using hash ${results_hash}")
    assertTrue("Task ${taskPath} successfully cached!" in


In [None]:
# Top-p (Nucleus) Sampling: Sampling from a dynamically adjusted set of words
outputs = model.generate(
    **inputs,
    max_length=inputs['input_ids'].shape[-1] + 80,
    do_sample=True,
    top_p=0.9
)
text_top_p = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
function = extract_function_kotlin(text_top_p.split("Code so far: ", 1)[-1])
print("\nGenerated text with Top-p (Nucleus) Sampling:")
print(f'\033[92m {function}')


Generated text with Top-p (Nucleus) Sampling:
[92m fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult {
    // Assert that the cache is properly configured.
    assert thatIsNone(cache) { "cache is not configured." }
    // Assert that all of the provided task paths have their corresponding cache entry
    // present.
    // This step assumes that cache entries are stored for each task path
    // sequentially.
    for (taskPath in


In [None]:
# Temperature Scaling: Adjusting the likelihood scores of words before sampling
outputs = model.generate(
    **inputs,
    max_length=inputs['input_ids'].shape[-1] + 80,
    do_sample=True,
    temperature=0.7
)
text_temp = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
function = extract_function_kotlin(text_temp.split("Code so far: ", 1)[-1])
print("\nGenerated text with Temperature Scaling:")
print(f'\033[92m {function}')


Generated text with Temperature Scaling:
[92m fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult? {
    if (taskPaths.isEmpty()) {
        return BuildResult.success("Cache entry is empty!")
    }
    if (taskPaths.size()!= 1) {
        return BuildResult.failure("Cache entry is not a single path!")
    }
    var cache = Cache.get(taskPaths[0


In [None]:
# Repetition Penalty: Penalizing repeated tokens in the generated output
repetition_penalty = 1.0
outputs = model.generate(
    **inputs,
    max_length=inputs['input_ids'].shape[-1] + 80,
    do_sample=True,
    repetition_penalty=repetition_penalty
)
text_rep_pen = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
function = extract_function_kotlin(text_rep_pen.split("Code so far: ", 1)[-1])
print("\nGenerated text with Repetition Penalty:")
print(f'\033[92m {function}')


Generated text with Repetition Penalty:
[92m fun assertTasksPackedToCache(vararg taskPaths: String): BuildResult {
        assert "tasksPacked" in this.cache
        for (taskPath: String) {
            val cachedResult = this.cache[taskPath]
            assertTasksPackedToCache(taskPaths, cachedResult.value)
        }
    }


We will search best combinations of these parameters later