In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

from classifier_manager import *
from model_generation import ModelGeneration
from perturbation import Perturbation

cur_dir = Path('.').parent.resolve()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
classifier_path = cur_dir / "classifiers" / 'qwen2.5-coder' / 'javadoc' / '760_0.5' / '1.pth'
clfr = load_classifier_manager(classifier_path)
llm_gen = ModelGeneration('qwen2.5-coder')
# embedding_model = ModelExtraction('qwen2.5-coder')

You have loaded an AWQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.
I have left this message as the final dev message to help you transition.

Important Notice:
- AutoAWQ is officially deprecated and will no longer be maintained.
- The last tested configuration used Torch 2.6.0 and Transformers 4.51.3.
- If future versions of Transformers break AutoAWQ compatibility, please report the issue to the Transformers project.

Alternative:
- AutoAWQ has been adopted by the vLLM Project: https://github.com/vllm-project/llm-compressor

For further inquiries, feel free to reach out:
- X: https://x.com/casper_hansen_
- LinkedIn: https://www.linkedin.com/in/casper-hansen-804005170/

Loading checkpoint shards: 100%|██████████| 5/5 [00:00<00:00, 11.99it/s]


In [4]:
uncommenter_pert = Perturbation(
    clfr,
    target_probability=0.01,
    accuracy_threshold=0.9,
)

commentor_pert = Perturbation(
    clfr,
    target_probability=0.99,
    accuracy_threshold=0.9,
)

In [5]:
with open(cur_dir / 'demo_examples/commented.java', 'r') as f:
    commented_code = f.read()

with open(cur_dir / 'demo_examples/comment-free.java', 'r') as f:
    uncommented_code = f.read()

with open(cur_dir / 'demo_examples/go-translation.go', 'r') as f:
    go_code = f.read()

with open(cur_dir / 'demo_examples/python-translation.py', 'r') as f:
    python_code = f.read()

with open(cur_dir / 'demo_examples/go-translation.go', 'r') as f:
    go_code = f.read()

# Experimenting with Uncommented Code Perturbation

## Without Perturbation

In [6]:
langs = ['python', 'c', 'go']
question = "Translate the following Java code to <<lang>>. Do not output anything else other than the translated code. Do not use code decorators before and after your output.\n\n" + uncommented_code

llm_gen.unset_perturbation()

for lang in langs:
    q = question.replace('<<lang>>', lang)
    output = llm_gen.generate(q)
    with open(cur_dir / f'perturbation_demo_results/uncommented_{lang}_translation.txt', 'w') as f:
        f.write(output['completion'])

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


## With Perturbation

In [7]:
llm_gen.set_perturbation(commentor_pert)

for lang in langs:
    q = question.replace('<<lang>>', lang)
    output_perturbed = llm_gen.generate(q)
    with open(cur_dir / f'perturbation_demo_results/{lang}_comment_perturbed.txt', 'w') as f:
        f.write(output_perturbed['completion'])

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


# Experimenting with Commented Code Perturbation

## Without Perturbation

In [8]:
langs = ['python', 'c', 'go']
question = "Translate the following Java code to <<lang>>. Do not output anything else other than the translated code. Do not use code decorators before and after your output.\n\n" + commented_code

llm_gen.unset_perturbation()

for lang in langs:
    q = question.replace('<<lang>>', lang)
    output = llm_gen.generate(q)
    with open(cur_dir / f'perturbation_demo_results/commented_{lang}_translation.txt', 'w') as f:
        f.write(output['completion'])

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [9]:
llm_gen.set_perturbation(uncommenter_pert)
for lang in langs:
    q = question.replace('<<lang>>', lang)
    output_perturbed = llm_gen.generate(q)
    with open(cur_dir / f'perturbation_demo_results/{lang}_uncomment_perturbed.txt', 'w') as f:
        f.write(output_perturbed['completion'])

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
