## Set up
### Import Packages and API keys 

In [None]:
# !pip install transformers datasets torch langchain-community faiss-cpu sentence-transformers
from getpass import getpass
from dotenv import load_dotenv
import os
from pathlib import Path

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

huggingface_api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

if not huggingface_api_token:
    huggingface_api_token = getpass("Enter your Hugging Face Hub API token: ")

## Model Selection

In [None]:
from langchain_community.llms import HuggingFaceHub
from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
# model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct")

# I will be using T5 model from open source huggingface library
# model_name = "mistralai/Mistral-7B-v0.1"
# model_name = "mistralai/Mistral-7B-Instruct-v0.1"

model_name = "tiiuae/falcon-7b-instruct"
# model_name = "tiiuae/falcon-7b"

llm = HuggingFaceHub(repo_id=model_name, model_kwargs={"temperature":0.5, "max_length":1024, "max_new_tokens":200})

## Template-based prompting with Langchain

In [None]:
# I will be using Langchain

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import pipeline

template= """
Please answer the question.
Answer professionally, and where appropriate, in a Computer Science educational context.
Question: {question}
Response:
"""

prompt = PromptTemplate(template=template, input_variables=["question"])
# # llm_chain = load_qa_chain(llm, chain_type="stuff")
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False)


## Chat Interface

In [None]:
import gradio as gr
# def chat_interface(textbox, chat):
#     input_dict = {'question': textbox}
#     response = llm_chain.run(input_dict)

#     print("user:", textbox)
#     print("bot:", response)
#     return response

def chat_interface(textbox, chat):
    input_dict = {'question': textbox}
    response_dict = llm_chain.invoke(input_dict)
    text = response_dict['text']  # Extract the text from the dictionary
    # Split the text based on "Response:" and extract the part after it
    # if "Response:" in text:
    response_text = text.split("Response:")[1].strip()
    print(text)
    return response_text

gr.ChatInterface(
    fn=chat_interface,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
    title="Chatbot",
    description="Ask Chatbot any question",
    theme="soft",
    examples=["What does AI stand for?", "What is Software Engineering?", "What is Cybersecurity?"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

#I am a final year Computer Science student seeking to find a graduate role in __. What are practical skills required for a career in __?
#I am a beginner that wants to get into __, where should I start?


## Evaluation

In [None]:
# Load standardized test set
    # IT Consultant, Cloud Engineer...

# ROGUE? BLUE?




In [None]:
# MAYBE DO THIS FIRST? AND SEE THE DOWNSIDE, AND LEARN THAT IT IS NOT REQUIRED (doesn't solve hallucinations and timely context!)
# Fine-tune with input and output example data sets

# Compare with different models (one fine-tuned one just pre-trained)