In [None]:
from dataset import VerilogDataset

from smooth_gradient import SmoothGradient
from integrated_gradient import IntegratedGradient

import torch
from torch import nn
from torch.utils.data import DataLoader

from IPython.display import display, HTML

from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
def read_file(file_path, encoding='iso-8859-1'):
    """Read the content of a file."""
    with open(file_path, 'r', encoding=encoding) as file:
        return file.read()

def remove_comments(code, comment_symbol='//'):
    """Remove comments from the code."""
    lines = code.split('\n')
    cleaned_lines = [line.split(comment_symbol)[0] for line in lines]
    return '\n'.join(cleaned_lines)

def remove_multiple_newlines(code):
    """Remove multiple consecutive newlines from the code."""
    lines = code.split('\n')
    cleaned_lines = []
    previous_line_empty = False
    for line in lines:
        if line.strip() == '':
            if not previous_line_empty:
                cleaned_lines.append(line)
            previous_line_empty = True
        else:
            cleaned_lines.append(line)
            previous_line_empty = False
    return '\n'.join(cleaned_lines)

def preprocess_code(code, preprocessors):
    """Apply a list of preprocessors to the code."""
    # for preprocessor in preprocessors:
    #     code = preprocessor(code)
    # return code.strip()
    return code

In [3]:
preprocessors = [remove_comments, remove_multiple_newlines]

In [None]:
model_name = 'ajn313/cl-verilog-1.0'
hugging_face_token = "your huggingface token"

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
    token = hugging_face_token
)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, token = hugging_face_token).to('cuda')

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
verilog_file_path = "test file path"
verilog_code = read_file(verilog_file_path)
verilog_code_lines = verilog_code.split('\n')

In [14]:
start = 35
end = 41
line_wise = True

In [15]:
torch.cuda.empty_cache()

In [22]:
coloder_string = []
if line_wise:
    verilog_codes = verilog_code_lines[start:end]
    for verilog_code in verilog_codes:
        test_example = [verilog_code]
        test_example = [verilog_code.strip()]
        batch_size = 1
    
        test_dataset = VerilogDataset(
            data_list=test_example,
            tokenizer=tokenizer
        )
        
        test_dataloader = DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=False,
        )
    
        inputs = tokenizer(verilog_code, return_tensors="pt", padding=True).to('cuda')
    
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
    
        tokens = [
        tokenizer.convert_ids_to_tokens(input_ids_)
        for input_ids_ in input_ids
        ]
    
        smooth_grad = SmoothGradient(
            model,
            tokenizer, 
            show_progress=False,
            num_steps=1000,
            stdev = 0.001,
        )
        instances = smooth_grad.saliency_interpret(test_dataloader)
        coloder_string.append(smooth_grad.colorize(instances[0]))
else:
    verilog_code = [code_line.strip() for code_line in verilog_code_lines[start:end]]
    verilog_code = ["\n".join(verilog_code)]
    test_example = verilog_code
    batch_size = 1

    test_dataset = VerilogDataset(
        data_list=test_example,
        tokenizer=tokenizer
    )
    
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
    )

    inputs = tokenizer(verilog_code[0], return_tensors="pt", padding=True).to('cuda')

    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    tokens = [
    tokenizer.convert_ids_to_tokens(input_ids_)
    for input_ids_ in input_ids
    ]

    smooth_grad = SmoothGradient(
        model,
        tokenizer, 
        show_progress=False,
        num_steps=1000,
        stdev = 0.001,
    )
    instances = smooth_grad.saliency_interpret(test_dataloader)
    coloder_string.append(smooth_grad.colorize(instances[0]))

In [23]:
# integrated_grad = IntegratedGradient(
#     model,
#     tokenizer, 
#     show_progress=False,
#     encoder="bert",
#     num_steps=100,
#     _min = 0.001,
#     _max = 10.0,
# )
# instances = integrated_grad.saliency_interpret(test_dataloader)

In [24]:
coloder_string = "<br>".join(coloder_string)
display(HTML(coloder_string))

In [25]:
HTML(coloder_string)

In [27]:
print("\n".join(verilog_code_lines[start:end]))

	assign idin  = { {(mwidth-dwidth){din[dwidth-1]}}, din};

	// generate multiplier structure
	always @(posedge clk)
	  if(ena)
	    mult_res <= #1 icoef * idin;
