In [None]:
!pip install accelerate==0.21.0 transformers==4.31.0 tokenizers==0.13.3
!pip install bitsandbytes==0.40.0 einops==0.6.1
!pip install xformers==0.0.22.post7
!pip install langchain==0.1.4
!pip install faiss-gpu==1.7.1.post3
!pip install sentence_transformers

Collecting accelerate==0.21.0
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers==4.31.0
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers==0.13.3
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate==0.21.0)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate==0.21.0)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-

In [None]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = 'huggingface_token'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda122.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 122
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda122.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


Model loaded on cuda:0


In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [None]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [None]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [None]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [None]:
res = generate_text("Explain me the difference between Data Lakehouse and Data Warehouse.")
print(res[0]["generated_text"])

Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between data lakehouse and data warehouse is a common topic of discussion in the data engineering community, as both are designed to store large amounts of data but have different architectures and use cases. A data lakehouse is a centralized repository that stores all the raw data from various sources in its original form, while a data warehouse is a structured repository that organizes data into a specific schema for querying and analysis.

A data lakehouse is a centralized repository that stores all the raw data from various sources in its original form. This means that the data is not transformed or cleaned before being stored in the lakehouse, which allows for greater flexibility in how the data can be used later on. In contrast, a data warehouse is a structured repository that organizes data into a specific schema for querying and analysis. This means that the data is transformed and cleaned befor

In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
llm(prompt="Explain me the difference between Data Lakehouse and Data Warehouse.")

  warn_deprecated(


" Unterscheidung between data lakehouse and data warehouse is a common topic of discussion in the data engineering community, as both are designed to store large amounts of structured and unstructured data. A data warehouse is a centralized repository that stores data from various sources in a structured format, making it easier for organizations to analyze and make decisions based on that data. A data lakehouse, on the other hand, is a more flexible and scalable storage solution that allows for the ingestion and storage of vast amounts of structured and unstructured data, including raw data, logs, and social media feeds.\n\nIn this article, we will explore the key differences between these two data storage solutions and help you determine which one best fits your organization's needs. What is a Data Warehouse? A data warehouse is a centralized repository that stores data from various sources in a structured format. It is designed to support business intelligence (BI) activities by pro

In [None]:
from langchain.document_loaders import WebBaseLoader

web_links = ["https://www.uts.edu.my/about-university/","https://en.wikipedia.org/wiki/University_of_Technology_Sarawak", "https://www.uts.edu.my/school/", "https://scm.uts.edu.my/programme/undergraduate-programme/bachelor-of-computer-science/"]

loader = WebBaseLoader(web_links)
documents = loader.load()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [None]:
chat_history = []

query = "ucts is uts?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])
print(chat_history)

  warn_deprecated(


 Yes, UCTS is now known as UTS after it was upgraded to a full-fledged university in 2021.
[]


In [None]:
chat_history = [(query, result["answer"])]

query = "what is full name of ucts and uts?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])
print(chat_history)

  According to the provided context, the full name of UCTS before it was upgraded to a university in 2021 was "University College of Technology Sarawak."
[('ucts is uts?', ' Yes, UCTS is now known as UTS after it was upgraded to a full-fledged university in 2021.')]


In [None]:
print(result['source_documents'])

[Document(page_content='^ "UCTS upgraded to full-fledged university, huge achievement for Sarawak". Borneo Post Online. 23 November 2021. Retrieved 26 September 2023.\n\n^ "UCTS appoints Taib chancellor at inaugural convocation". The Borneo Post. 9 October 2016. Retrieved 28 March 2019.\n\n^ "Yayasan S\'wak now wholly owns UCTS". The Borneo Post. 30 January 2021. Archived from the original on 1 February 2021. Retrieved 2 December 2021.\n\n^ Boon, Peter (14 May 2021). "Sibu\'s UCTS one of Malaysia\'s seven beautiful campuses, says high society magazine". The Borneo Post. Archived from the original on 14 May 2021. Retrieved 23 November 2021.\n\n^ "University Technology Sarawak to be launched tomorrow". Dayak Daily. 9 November 2021. Retrieved 14 November 2021.\n\n^ Hasnah Jusid (23 November 2021). "Taraf universiti penuh, UTS satu pencapaian besar untuk Sarawak (UTS as full-fledged university is a significant achievement for Sarawak)" (in Malay). Bernama. Retrieved 23 November 2021.', met

In [None]:
import locale
def getpreferredencoding(do_setlocale = True):
 return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
!mkdir templates -p

In [None]:
%%writefile templates/index.html

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>UTS Cafeteria GPT</title>
  <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css">
  <style>
    body {
        background-image:  url(https://lh5.googleusercontent.com/p/AF1QipPr5YfZjO-FRPh9Xqv83eJ19l6Kl3CdOsAJ6t4-=w141-h118-n-k-no-nu);
        background-color: rgba(0, 0, 0, 0.8);
        background-repeat: no-repeat;
        background-position: center;
        background-attachment: fixed;
        background-size: cover;
        background-blend-mode: darken;
      font-family: Arial, sans-serif;
    }

    .container {
      max-width: 600px;
      margin: 100px auto;
      background-color: #fff;
      border-radius: 8px;
      box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
      padding: 20px;
    }

    #chatContainer {
      height: 600px;
      overflow-y: auto;
      border: 1px solid #ccc;
      border-radius: 8px;
      padding: 10px;
      margin-bottom: 20px;
      background-color: #f9f9f9;
    }

    .messageContainer {
      margin-bottom: 10px;
      overflow: hidden;
    }

    .message {
      padding: 10px;
      border-radius: 8px;
      max-width: 80%;
      word-wrap: break-word;
    }

    .userMessage {
      background-color: #007bff;
      color: #fff;
      float: right;
    }

    .botMessage {
      background-color: #28a745;
      color: #fff;
      float: left;
    }

    .avatar {
      width: 30px;
      height: 30px;
      border-radius: 50%;
      margin-right: 10px;
      float: left;
    }

    .typingIndicator {
      display: inline-block;
      width: 10px;
      height: 10px;
      margin-right: 5px;
      background-color: #ccc;
      border-radius: 50%;
      animation: typingAnimation 1s infinite;
    }

    @keyframes typingAnimation {
      0% {
        background-color: #ccc;
      }
      50% {
        background-color: transparent;
      }
      100% {
        background-color: #ccc;
      }
    }

    /* Style for form input and button */
    .form-group {
      display: flex;
      align-items: center;
    }

    #question {
      flex: 1;
      margin-right: 10px;
      padding: 8px;
      border: 1px solid #ccc;
      border-radius: 5px;
    }

    #submitBtn {
      padding: 8px 20px;
      background-color: #007bff;
      color: #fff;
      border: none;
      border-radius: 5px;
      cursor: pointer;
    }
  </style>
</head>
<body>

<div class="container">
  <h1 class="text-center">Cafeteria GPT</h1>

  <div id="chatContainer"></div>

  <form id="chatForm" action="#" method="POST">
    <div class="form-group">
      <input type="text" id="question" name="question" placeholder="Type your message..." autocomplete="off">
      <button type="submit" id="submitBtn">Send</button>
    </div>
  </form>
</div>

<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<script>
  $(document).ready(function() {
    // 定义一个基类
    function ChatMessage(content, isUser) {
      this.content = content;
      this.isUser = isUser;
    }

    // 添加一个方法到 ChatMessage 的原型上，用于展示消息
    ChatMessage.prototype.displayMessage = function() {
      var messageClass = this.isUser ? "userMessage" : "botMessage";
      var messageContent = "<div class='message " + messageClass + "'>" + this.content + "</div>";
      var avatar = this.isUser ? "" : "<img src='https://cdn-icons-png.flaticon.com/512/4712/4712109.png' class='avatar'>";
      return "<div class='messageContainer'>" + avatar + messageContent + "</div>";
    };

    // 派生一个具体类，继承自基类 ChatMessage
    function UserMessage(content) {
      ChatMessage.call(this, content, true);
    }

    // 使用原型链继承基类的方法
    UserMessage.prototype = Object.create(ChatMessage.prototype);
    UserMessage.prototype.constructor = UserMessage;

    // 派生一个具体类，继承自基类 ChatMessage
    function BotMessage(content) {
      ChatMessage.call(this, content, false);
    }

    // 使用原型链继承基类的方法
    BotMessage.prototype = Object.create(ChatMessage.prototype);
    BotMessage.prototype.constructor = BotMessage;

    // 创建一个聊天界面类，用于处理用户输入和显示消息
    function ChatInterface() {
      this.messages = [];
    }

    ChatInterface.prototype.sendMessage = function(message) {
      var userMessage = new UserMessage(message.trim());
      this.messages.push(userMessage);
      this.displayMessages();
      // 模拟机器人回复
      this.showTypingIndicator();
      var that = this;
      $.ajax({
        url: '/get_response', // 将此 URL 替换为后端路由，用于获取机器人的回复消息
        type: 'POST',
        data: { question: message },
        success: function(response) {
          that.botReply(response);
        },
        error: function(xhr, status, error) { //exception handling
          console.error("Error:", error);
          that.botReply("Sorry, Got Some Error, Please press f12 to check the error")
        }
      });
    };

    ChatInterface.prototype.botReply = function(response) {
      var botMessage = new BotMessage(response);
      // 移除等待特效
      this.messages.pop();
      this.messages.push(botMessage);
      this.displayMessages();
    };

    ChatInterface.prototype.showTypingIndicator = function() {
      var typingIndicator = "<span class='typingIndicator'></span><span class='typingIndicator'></span><span class='typingIndicator'></span>";
      var botMessage = new BotMessage(typingIndicator);
      this.messages.push(botMessage);
      this.displayMessages();
    };

    ChatInterface.prototype.displayMessages = function() {
      var chatContainer = $("#chatContainer");
      chatContainer.empty();
      this.messages.forEach(function(message) {
        chatContainer.append(message.displayMessage());
      });
      scrollChatContainer();
    };

    // 滚动聊天容器到底部
    function scrollChatContainer() {
      var chatContainer = $("#chatContainer");
      chatContainer.scrollTop(chatContainer.prop("scrollHeight"));
    }

    // 创建一个聊天界面实例
    var chatInterface = new ChatInterface();

    // 初始问候消息
    var initialGreeting = "Hello and Welcome to Cafeteria Chatbot! What can I do for you?";
    var botMessage = new BotMessage(initialGreeting);
    chatInterface.messages.push(botMessage);
    chatInterface.displayMessages();

    // 监听表单提交事件
    $("#chatForm").submit(function(event) {
      event.preventDefault();
      var userInput = $("#question").val();
      if (userInput) {
        chatInterface.sendMessage(userInput);
        $("#question").val("");
      }
    });

    // Handle constant responses for specific user inputs
    $("#chatForm").on("input", function() {
      var userInput = $("#question").val().toLowerCase();
      if (lowerCaseMessage.includes('hello') || lowerCaseMessage.includes('hi')) {
          return 'Hello! How can I assist you?';
      } else if (lowerCaseMessage.includes('help')) {
            return 'Sure, I can help you. What do you need assistance with?';
      } else if (lowerCaseMessage.includes('bye') || lowerCaseMessage.includes('goodbye')) {
            return 'Goodbye! Have a great day!';
      } else {
            return "I'm sorry, I didn't understand that. Can you please rephrase?";
      }
    });

  });
</script>

</body>
</html>




Writing templates/index.html


In [None]:
import os
import threading

In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.6


In [None]:
!killall ngrok

ngrok: no process found


In [None]:
from flask import Flask, render_template, request, jsonify
from pyngrok import ngrok
!ngrok version
os.environ["FLASK_DEBUG"] = "development"
# template_folder = '/gdrive/MyDrive/AIWeb/template'
app = Flask(__name__)
port = 5000
ngrok.set_auth_token("ngrok_token")
public_url = ngrok.connect(port).public_url
print("* ngrok tunnel \"{}\">\"http://127.0.0.1:{}\"".format(public_url, port))

app.config["BASE_URL"] = public_url
message = ""
question = ""
questionArray = []
chat_history = []
@app.route("/")
@app.route("/index", methods=['GET', 'POST'])
def index():
  global message
  global question
  # global questionArray
  if request.method == 'POST':
    # chat_history = [(query, result["answer"])]
    # question = request.form.get("question")
    getQuestion = request.form.get("question")
    result = chain({"question": getQuestion, "chat_history": chat_history})
    question = getQuestion
    message = result['answer']
    # questionArray.append()
    # print(result)
    # print(chat_history)
  return render_template('index.html', message=message, question=question)
@app.route('/get_response', methods=['POST'])
def get_response():
    user_question = request.form['question']
    result = chain({"question": user_question, "chat_history": chat_history})
    bot_response = result['answer']
    return bot_response
threading.Thread(target=app.run, kwargs={"use_reloader": False}).start()

ngrok version 3.9.0
pyngrok version 7.1.6
* ngrok tunnel "https://f157-34-105-79-248.ngrok-free.app">"http://127.0.0.1:5000"


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


Mounted at /content/gdrive


In [None]:
from flask import Flask, render_template, request
from pyngrok import ngrok
!ngrok version
os.environ["FLASK_ENV"] = "development"
template_folder = '/gdrive/MyDrive/AIWeb/template'
app = Flask(__name__,template_folder=template_folder)
port = 5000
ngrok.set_auth_token("ngrok_token")
public_url = ngrok.connect(port).public_url
print("* ngrok tunnel \"{}\">\"http://127.0.0.1:{}\"".format(public_url, port))

app.config["BASE_URL"] = public_url

@app.route("/", methods=['GET', 'POST'])
@app.route("/index", methods=['GET', 'POST'])
def index():
  global message
  if request.method == 'POST':
    chat_history = [(question, result["answer"])]
    question = request.form.get("question")
    # query = request.form.get("question")
    result = chain({"question": question, "chat_history": chat_history})
    message = result['answer']
    print(result['answer'])
    # print(chat_history)
  return render_template('index.html', message=message)
threading.Thread(target=app.run, kwargs={"use_reloader": False}).start()

ngrok version 3.9.0
pyngrok version 7.1.6
* ngrok tunnel "https://2c8b-34-105-79-248.ngrok-free.app">"http://127.0.0.1:5000"


'FLASK_ENV' is deprecated and will not be used in Flask 2.3. Use 'FLASK_DEBUG' instead.