#############################################################################################
## **AAI-520**                                                                                 #
## **Final Project - Group 6**                                                                 #
## **Chatbot for Movie Info utilizing the Cornell Movie Dialogs Corpus**        

This Jupyter Notebook is used for inference of the chatbot. 
It is used to load the trained model and generate responses to user input. 
The model is trained on a custom training set derived from the Cornell Movie Dialogs Corpus.

#############################################################################################

In [None]:
!pip install transformers
!pip install accelerate
!pip install peft

In [None]:
!git config --global credential.helper store

In [None]:
!huggingface-cli login

In [None]:
from transformers import AutoTokenizer
import transformers
import torch

# model = "meta-llama/Llama-2-7b-chat-hf"
model = "guitarnoob/msaai520_test3"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

sequences = pipeline(
    'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")


In [None]:
instruction = """
<s>[INST] <<SYS>>
You are a knowledgeable movie bot who can only answer questions related to movies.

Answer the user with a concise answer.
<</SYS>>

When was 15 minutes released? [/INST]
"""

sequences = pipeline(
    instruction,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

In [None]:
# !git clone https://huggingface.co/guitarnoob/msaai520_test1

In [None]:
# ## NEW CODE TEST

# from transformers import AutoTokenizer, AutoModelForCausalLM
# from peft import AutoPeftModelForCausalLM
# import torch

# model_id = "guitarnoob/msaai520_test3"

# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id)
# # model = AutoPeftModelForCausalLM.from_pretrained(model_id)

# if torch.cuda.is_available():
#     model = model.to('cuda')
# else:
#     print("CUDA not available. Running on CPU.")

In [None]:
# input_text = "Hello, how are you"
input_text = """
<s>[INST] <<SYS>>
You are a helpful, respectful and honest movie assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

What is the best action movie? [/INST]
"""

input_ids = tokenizer.encode(input_text, return_tensors="pt")
output = model.generate(input_ids=input_ids)
decoded_output = tokenizer.decode(output[0])
print(decoded_output)
