In [1]:
# !pip install ctransformers[cuda]==0.2.27 \
#     gradio \
#     chromadb \
#     langchain \
#     torch \
#     sentence-transformers

In [2]:
!pip install llama-cpp-python



In [3]:
import gradio as gr
import time
from ctransformers import AutoModelForCausalLM


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def load_llm():
    llm = AutoModelForCausalLM.from_pretrained(
        # "../models/llama-2-13b-chat.Q8_0.gguf",
        "../models/llama-2-7b-chat.Q8_0.gguf",
        model_type='llama',
        max_new_tokens = 2048,
        repetition_penalty = 1.0,
        # temperature = 0.75,
        gpu_layers=150,
        context_length=4096
    )
    return llm

In [5]:
def llm_function(message, chat_history):
    llm = load_llm()
    response = llm(
        message
    )
    output_texts = response
    return output_texts

In [6]:
title = "Llama2 7B Quantized GGUF"

examples = [
    'Plan a 4 day trip to Japan, include the travel times.',
    'Plan a 5 day trip to Mumbai'
]

In [7]:
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

In [8]:
from langchain.document_loaders import TextLoader

loader = TextLoader("../data/destinations.txt", encoding='UTF-8')

data = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [9]:
from langchain.vectorstores import Chroma
from langchain.embeddings import GPT4AllEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embed_model)

In [10]:
question = "What are places to visit in Mumbai?"
docs = vectorstore.similarity_search(question)
docs

[Document(page_content='India:Mumbai:Sanjay Gandhi National Park\n    Location: Northern suburbs of Mumbai.\n    Time to Spend: Half-day to Full-day.\n    Tags: Nature, Wildlife, Adventure.\n    Postal Code: 400066.', metadata={'source': '../data/destinations.txt'}),
 Document(page_content='India:Mumbai:Chhatrapati Shivaji Maharaj Vastu Sangrahalaya (Formerly the Prince of Wales Museum)\n    Location: Fort.\n    Time to Spend: 2-3 hours.\n    Tags: Cultural, Historical, Educational.\n    Postal Code: 400023.\n\nIndia:Mumbai:Marine Drive and Chowpatty Beach\n    Location: Along the coast, starting from Nariman Point to Malabar Hill.\n    Time to Spend: 1-2 hours.\n    Tags: Scenic Views, Relaxation, Local Cuisine.\n    Postal Code: Marine Drive 400020, Chowpatty 400007.', metadata={'source': '../data/destinations.txt'}),
 Document(page_content='India:Mumbai:Juhu Beach\n    Location: Juhu.\n    Time to Spend: 1-3 hours.\n    Tags: Beach, Local Cuisine, Sunset Views.\n    Postal Code: 400

In [11]:
from langchain.embeddings import LlamaCppEmbeddings
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
n_gpu_layers = 32 
n_batch = 512
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [12]:
llm = LlamaCpp(
    model_path="../models/llama-2-7b-chat.Q8_0.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=False,
    max_new_tokens=2048
)

                max_new_tokens was transferred to model_kwargs.
                Please confirm that max_new_tokens is what you intended.


In [13]:
from langchain.chains import RetrievalQA

rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever=vectorstore.as_retriever()
)

In [14]:
rag_pipeline("Give me a travel itinerary for a 4 day trip to Mumbai.")

  warn_deprecated(


 Sure! Based on the information provided, here's a suggested 4-day travel itinerary for Mumbai:
Day 1: Nature and Wildlife
* Start your day at Sanjay Gandhi National Park in the northern suburbs of Mumbai. Spend half a day exploring the park's diverse flora and fauna, including leopards, elephants, and over 200 species of birds.
* Post lunch, head to Chhatrapati Shivaji Maharaj Vastu Sangrahalaya (Formerly the Prince of Wales Museum) in Fort to learn about Mumbai's rich cultural and historical heritage.
Day 2: Scenic Views and Local Cuisine
* Begin your day at Marine Drive and Chowpatty Beach, enjoying the scenic views of the Arabian Sea and taking a stroll along the promenade.
* For lunch, try some local street food at Chowpatty Beach or nearby Juhu Beach.
* In the evening, head to Elephanta Caves on Elephanta Island, accessible by ferry from Gateway of India. Explore

{'query': 'Give me a travel itinerary for a 4 day trip to Mumbai.',
 'result': " Sure! Based on the information provided, here's a suggested 4-day travel itinerary for Mumbai:\nDay 1: Nature and Wildlife\n* Start your day at Sanjay Gandhi National Park in the northern suburbs of Mumbai. Spend half a day exploring the park's diverse flora and fauna, including leopards, elephants, and over 200 species of birds.\n* Post lunch, head to Chhatrapati Shivaji Maharaj Vastu Sangrahalaya (Formerly the Prince of Wales Museum) in Fort to learn about Mumbai's rich cultural and historical heritage.\nDay 2: Scenic Views and Local Cuisine\n* Begin your day at Marine Drive and Chowpatty Beach, enjoying the scenic views of the Arabian Sea and taking a stroll along the promenade.\n* For lunch, try some local street food at Chowpatty Beach or nearby Juhu Beach.\n* In the evening, head to Elephanta Caves on Elephanta Island, accessible by ferry from Gateway of India. Explore"}

In [None]:
gr.ChatInterface(
    fn = llm_function,
    title=title,
    examples=examples
).launch()