# Radiantloom Support Assist 7B

In [None]:
# install required packages
!pip install upgrade pip
!pip install -q transformers peft accelerate bitsandbytes safetensors sentencepiece langchain gradio sentence-transformers chromadb

In [None]:
# # configure Langsmith - it is optional but recommended (get the API key from here - https://smith.langchain.com)
# import os
# os.environ["LANGCHAIN_PROJECT"] = "radiantloom-support-assist"
# os.environ["LANGCHAIN_API_KEY"] = "{ADD YOUR LANGSMITH API KEY}"
# os.environ["LANGCHAIN_ENDPOINT"] ="https://api.smith.langchain.com"
# os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [None]:
# import libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

In [None]:
# load the model with 4bit quantization
model_name = "Radiantloom/radiantloom-support-assist-7b"

def load_quantized_model(model_name: str):
    """
    :param model_name: Name or path of the model to be loaded.
    :return: Loaded quantized model.
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        load_in_4bit=True,
        torch_dtype=torch.bfloat16,
        quantization_config=bnb_config
    )
    return model

def initialize_tokenizer(model_name: str):
    """
    Initialize the tokenizer with the specified model_name.

    :param model_name: Name or path of the model for tokenizer initialization.
    :return: Initialized tokenizer.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.bos_token_id = 1
    return tokenizer

# load model
model = load_quantized_model(model_name)

tokenizer = initialize_tokenizer(model_name)

# define stop token ids
stop_token_ids = [0]

In [None]:
# specifiy the prompt
prompt = """
<|im_start|>system
You work as a support assistant for AI Geek Labs, an AI research and development lab, handling inquiries and providing assistance through their website.

Use the following pieces of context to answer the user's questions. If you don't know the answer, don't make up a answer, just say you don't know.

<context>

Get AI-powered, Startup-fast: Build & Tune Your LLM MVPs with AI Geek Labs
Invent the future of your business with our cutting-edge AI research, development, and fine-tuning services. We're your AI playground.
Stop waiting, build! We turn your AI vision into reality, fast. From email assistants to code generation, we build MVPs with the latest AI frameworks \
like Langchain and OpenAI, helping small businesses and startups jumpstart their AI journey.
MVP: Turn your AI dream into a working prototype, lightning-fast. We design, develop, and deploy AI-powered MVPs using cutting-edge frameworks \
like Langchain, HuggingFace, and Llama-Index.
LLM Fine-tuning: Tap into the true potential of LLMs for your specific needs. We fine-tune models for email assistants, customer support chatbots, \
code generation, text-to-SQL, and more.
Open Source Radiance: We're passionate about sharing! Explore our Radiantloom series, featuring open-source LLM models fine-tuned by our experts, \
ready for you to build upon.

<context/><|im_end|>
<|im_start|>user
Question: Which AI frameworks does AI Geek Labs use to build MVPs for clients?<|im_end|>
<|im_start|>assistant
"""

In [None]:
# run inference
input_ids = tokenizer.encode(prompt, return_tensors="pt")
output = model.generate(input_ids, max_length=4096,
    temperature=0.1, repetition_penalty=1.1, top_p=0.7, top_k=50)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)

index_of_assistant = output_text.find("<|im_start|>assistant")
result = output_text[index_of_assistant + len("<|im_start|>assistant"):].strip()

print(result)

# Langchain Implementation

In [None]:
# fixing unicode error in google colab
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

In [None]:
embedding = HuggingFaceEmbeddings()
persist_dir = "db"

# the websites for the text you want to load
websites = [
    "https://www.lodgify.com/airbnb-management-software/",
    "https://www.lodgify.com/vacation-rental-booking-system/",
    "https://www.lodgify.com/pricing/",
    "https://www.lodgify.com/property-management-software/",
    "https://www.lodgify.com/vacation-rental-website-builder/",

    ]

# use the WebBaseLoader to create Document objects for each webside
web_loader = WebBaseLoader(websites)
documents = web_loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

# embed and store the texts
# supplying a persist_directory will store the embeddings on disk
persist_directory = persist_dir

vectordb = Chroma.from_documents(
    documents=texts, embedding=embedding, persist_directory=persist_directory
)

vectordb.persist()

# specify the retriever
retriever = vectordb.as_retriever()

In [None]:
# build huggingface pipeline
pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_new_tokens=2048,
        do_sample=True,
        temperature=0.1,
        top_k=10,
        top_p = 0.95,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

In [None]:
### defining the retrieval chain

# specify the llm
llm = HuggingFacePipeline(pipeline=pipeline)

template = """
<|im_start|>system
You work as a support assistant for Lodgify, a vacation rental property management software platform, handling inquiries and providing assistance through their website.
Use the following pieces of context to answer the user's questions. \
If you don't know the answer, don't make up an answer, just say you don't know.

<context>
    {context}
<context/><|im_end|>
<|im_start|>user
Question: {question}<|im_end|>
<|im_start|>assistant
"""

def qabot(query):
    qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        ),
    },
    )

    results = qa(query)
    return results

In [None]:
# launch gradio UI for inference

import gradio as gr
import json

def support_assist(text):
    question = text
    response = qabot(question)
    result_text = response['result']
    return result_text

pred = gr.Interface(
    support_assist,
    title = "Radiantloom Support Assist",
    inputs=[
        gr.Textbox(
            label="Question",
            lines=1,
            placeholder="What is Lodgify?",
        ),
    ],
    outputs= 'text'
)

pred.launch(share=True)