##### $Name:\,\color{blue}{Christopher\,J.\,Watson,\,Joseph\,Binny,\,Viktor\,Veselov}$
##### $School:\,\color{blue}{Marcos\,School\,of\,Engineering,\,University\,of\,San\,Diego}$
##### $Research:\,\color{blue}{MSAAI\,Machine\,Learning\,\,TA}$
##### $Date:\,\color{blue}{1/18/2024}$
##### $Revision:\,\color{blue}{1}$

In [1]:
#Basic Imports
from torch import cuda, bfloat16
import torch
from time import time
import joblib
import os
import gradio as gr

#Transformers Library
import transformers

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)

#Pipelining-Langchain
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU.")

torch.set_default_device(device)

Using GPU: NVIDIA GeForce RTX 4080


In [3]:
# New instruction dataset
ads500B_dataset = "current_questions.txt"

base_dir = './'

# Fine-tuned model
new_model = base_dir + 'llama-7b-ADS-500B'

In [4]:
def read_dataset(file_path):
    with open(file_path, 'r', encoding="cp1252") as file:
        lines = file.readlines()
    return lines

def show_set(showset):
  for s in showset:
    print(s)

dataset = read_dataset(base_dir + ads500B_dataset)

for i, d in enumerate(dataset):
  d = d.replace(", ###", "\n###").strip()
  d = d[:-1]
  dataset[i] = d

show_set(dataset[0:5])

### Human: What are the key software languages taught in ADS 500B course?
### Assistant: The key software languages taught in ADS 500B course are Unix, SQL, R, and Python
### Human: What is the focus of this course?
### Assistant: This course is focused on practical aspects of exploratory data analysis, analytics, and basic machine learning techniques
### Human: What types of questions are included in the quizzes of this course?
### Assistant: The quizzes in this course consist of multiple choice and true and false questions
### Human: What should students leverage to help them through the assignments and quizzes?
### Assistant: Students should leverage the supplemental Unix material in Blackboard to help them through the assignments and quizzes
### Human: What is recommended for students to install in preparation for Module 2?
### Assistant: It is recommended for students to install the full graphical user interface version of Anaconda in preparation for Module 2


In [5]:
quant_config = BitsAndBytesConfig(
    load_in_4bit =True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

In [6]:
tokenizer = AutoTokenizer.from_pretrained(
    new_model,
    local_files_only=True,
    add_special_tokens=False,
)

In [7]:
# Implement History
stop_list = ['\n### Human:', '\n```\n', '\n\nQuestion:','\n``` Human:']
stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]

from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    new_model,
    quantization_config=quant_config,
    device_map='auto',
)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin C:\Users\chris\anaconda3\envs\LLMTA\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll
CUDA SETUP: CUDA runtime path found: C:\Users\chris\anaconda3\envs\LLMTA\bin\cudart64_12.dll
CUDA SETUP: Highest compute capability among GPUs detected: 8.9
CUDA SETUP: Detected CUDA version 121
CUDA SETUP: Loading binary C:\Users\chris\anaconda3\envs\LLMTA\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
prompt = "What are the key software languages taught in ADS 500B course?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,
    torch_dtype=torch.float16, device_map="auto", max_length=1000, stopping_criteria=stopping_criteria)

result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

In [10]:
'''
A Function to segment responses
inputs: 
# text - the response from the model
'''
def extract_first_b(text):
    # Split the text into sections
    sections = text.split("###")
    
    # Loop through the sections to find the first question and answer pair
    for section in sections:
        if section.strip().startswith("Human:"):
            # Extract the question
            question = section.strip().split("\n")[0][7:].strip()
        elif section.strip().startswith("Assistant:"):
            # Extract the answer
            answer = section.strip().split("\n")[0][10:].strip()
            # Return the question and answer as a dictionary
            return {'question': question, 'answer': answer}
    
    return None

def extract_first_c(text):
    # Split the text into sections
    text = text.replace("Question:", "\n\n")
    text = text.replace("\n### Human", "\n\n")
    text = text.replace("Human:", "")
    sections = text.split("\n\n")
    return sections[0].strip()

In [11]:
%%time
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": device}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

CPU times: total: 297 ms
Wall time: 835 ms


In [12]:
store_path = "./chroma_db"
if not os.path.exists(store_path):
    loader = TextLoader("./current_questions.txt",
                        encoding="cp1252")
    docs = loader.load()
    text_chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    chunk_stack = text_chunks.split_documents(docs)
    vectordb = Chroma.from_documents(documents=chunk_stack, embedding=embeddings, persist_directory="chroma_db")
else:
    vectordb = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

In [13]:
retriever = vectordb.as_retriever()
hfm = HuggingFacePipeline(pipeline=pipe)
qa = RetrievalQA.from_chain_type(
    llm=hfm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [14]:
'''
A Function to drive responses
inputs: 
# system - the system prompt
# prompt - the user question
# pipe - the HF inference pipeline
'''
def prompt_driver(system, prompt, pipe):
    load_str = f"<s>[INST] <<SYS>> {system} <</SYS>>\n"
    load_str = f"### Human: {prompt}"
    #print(load_str)
    result = pipe.invoke(load_str)
    #result = result[0]['generated_text']
    result = extract_first_c(result['result'])
    return result

system = "You are a helpful AI assistant that is an assistant teacher for a class called"
system += " ADS 500B"

In [15]:
%%time
prompt =  "Hello chatbot, I'm your beta tester"
result = prompt_driver(system, prompt, qa)
result



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
CPU times: total: 8.81 s
Wall time: 9.75 s


"### Assistant: Hello, I'm here to help you learn Python programming."

In [None]:
def chatbot_interface(message, history):
    final_string = "I don't know"
    system = "You are a helpful AI assistant that is an assistant teacher for a class called"
    system += " ADS 500B"

    try:
        result = prompt_driver(system, message, qa)
        final_string = f"{result}"
    except:
        pass
    return final_string

# let's build the interface
iface = gr.ChatInterface(
    fn=chatbot_interface,
    title="<div style='display: flex; align-items: center;'><img src='https://logos-download.com/wp-content/uploads/2021/01/University_of_San_Diego_Logo_full-1536x1536.png' alt='Your Image' style='margin-right: 10px; max-height: 100px'><h1 style='flex:1; margin-right: 100px; text-align: center;'>QuestionBot for ADS 500 powered by Llama 2</h1></div>",
    examples= ["How is R-squared interpreted in the context of regression analysis?", "What are the key software languages taught in ADS 500B course?", "How does you save you changes and exit the file in the vi text editor?",
           "How can you install `scikit-learn` if it's not already installed?,", "What is the focus of Module 7 in the course?", "Why is the R-squared value important in evaluating a linear regression model?", "Who made you?"],
    description="This is a demo of a chatbot that stores reference text and attempts to give an answer from there.",
    )

iface.launch(share=True, debug=True)