# Inference With Custom Dataset

In [None]:
import torch
import intel_extension_for_pytorch as ipex
from bigdl.llm.transformers import AutoModelForCausalLM
from transformers import AutoTokenizer

## Inference on Fine-tuned model

In [None]:
import ipywidgets as widgets
input_prompt = widgets.Text(
    value='Did you encounter an error message saying "Legacy-Install-Failure" when installing the Openvino-Dev pip package using Python 3.10?',
    placeholder='Type something',
    description='Question:',
    disabled=False,
    layout=widgets.Layout(width="90%")
)
input_prompt

In [None]:
CUSTOM_PROMPT = "### Question: {prompt} ### Answer:"

In [None]:
device = "xpu"
model_path = "./outputs/own-merged-llm"
model = AutoModelForCausalLM.from_pretrained(model_path, load_in_4bit=True, optimize_model=True, trust_remote_code=True)
model = model.to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
with torch.inference_mode():
    prompt = CUSTOM_PROMPT.format(prompt=input_prompt.value)
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    output = model.generate(input_ids, temperature=0.1, max_new_tokens=512)
    if device == "xpu":
        torch.xpu.synchronize()
    output = output.cpu()
    output_str = tokenizer.decode(output[0], skip_special_tokens=True)
    print(output_str)
    finetuned_result = output_str.replace(prompt, "")

## Inference on Original Model

In [None]:
del model, tokenizer
torch.xpu.empty_cache()

device = "xpu"
model_path = "mistralai/Mistral-7B-Instruct-v0.1"
ori_model = AutoModelForCausalLM.from_pretrained(model_path, load_in_4bit=True, optimize_model=True, trust_remote_code=True)
ori_model = ori_model.to(device)
ori_tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
with torch.inference_mode():
    prompt = CUSTOM_PROMPT.format(prompt=input_prompt.value)
    input_ids = ori_tokenizer.encode(prompt, return_tensors="pt").to(device)
    output = ori_model.generate(input_ids, temperature=0.1, max_new_tokens=512)
    if device == "xpu":
        torch.xpu.synchronize()
    output = output.cpu()
    output_str = ori_tokenizer.decode(output[0], skip_special_tokens=True)
    print(output_str)
    original_result = output_str.replace(prompt, "")

## Original Result vs Fine-tuned Results

In [None]:
print(f"""
==================================== Original Model =============================================
{original_result}
===================================== Fine-Tuned Model ===========================================
{finetuned_result}
=================================================================================================
""")

## Notices & Disclaimers 

Intel technologies may require enabled hardware, software or service activation. 

No product or component can be absolutely secure.  

Your costs and results may vary.  

No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (0BSD), Open Source Initiative. No rights are granted to create modifications or derivatives of this document. 

© Intel Corporation.  Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries.  Other names and brands may be claimed as the property of others.  