In [1]:
# https://github.com/pinecone-io/examples/blob/master/learn/generation/llm-field-guide/llama-2/llama-2-13b-retrievalqa.ipynb
from torch import cuda
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, AutoModelForQuestionAnswering
from langchain.llms import HuggingFacePipeline


model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

#AutoModelForQuestionAnswering

# tl_model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     trust_remote_code=True,
#     # quantization_config=bnb_config,
#     device_map='auto',
#     # use_auth_token=hf_auth
# )

# tl_model.eval()

In [2]:
tokenizer = AutoTokenizer.from_pretrained(
    model_id
)

langchain_tg = pipeline(
    model=model_id,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task= "text-generation",
    torch_dtype=torch.bfloat16, 
    device_map="auto",
    # we pass model parameters here too
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

basic_llm = HuggingFacePipeline(
    pipeline=langchain_tg,
    model_kwargs={"temperature": 0.7, "max_length": 512},
)

In [4]:
basic_llm(prompt="what is so special about Tinyllama?") ## 8 minutes



"\n\nTINYLLAMA: (smiling) Well, I'm a tiny little lamasaurus. And I have the power to transform into a small, furry animal!\n\nJACK: (impressed) That's amazing! Can you show me how it works?\n\nTINYLLAMA: (shows him) Sure thing! Whenever I feel happy or excited, I can transform into a rabbit or a squirrel. It's like having a whole new personality!\n\nJACK: (laughs) Wow, that's cool! But why did you choose to be a lamasaurus instead of a rabbit or a squirrel?\n\nTINYLLAMA: (pauses) Actually, there was a time when I wanted to be a rabbit. But then I realized that being a rabbit would mean living in a cage all day long. So I decided to become a lamasaurus instead. It's a bit more exciting and adventurous, don't you think?\n\nJACK: (nodding) Yeah, I guess it is. But what about your friends at school? Do they know about your powers?\n\nTINYLLAMA: (smiles) Not really. Most of my classmates are too busy worrying about their own problems to pay much attention to me. But I've been practicing my

In [5]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

In [6]:
vstore = FAISS.load_local("../Documents/vdb_faiss_index", embed_model)


In [7]:
from langchain.chains import RetrievalQA

rag_llm = RetrievalQA.from_chain_type(
    llm=basic_llm, chain_type='stuff',
    retriever=vstore.as_retriever()
)
     

In [8]:
# test
ans1 = basic_llm(prompt="what happen in Japan 2024?")
print(ans1) #4 minutes





1. The Tokyo Olympics will be held from July 23 to August 8, 2021.

2. The opening ceremony will take place on July 23 at 6:00 pm (JST).

3. The closing ceremony will take place on August 8 at 9:00 am (JST).

4. The duration of the games is expected to be around 17 days.

5. The games will feature a total of 33 sports and 33 disciplines.

6. The opening and closing ceremonies will be broadcast live on TV and online.

7. The athletes' village will be located in the city of Chiba, about 30 km northwest of Tokyo.

8. The venues for the games include the Olympic Stadium, the National Gymnasium, and other smaller venues.

9. The games will have a budget of approximately $10 billion.

10. The Tokyo Olympics are part of the International Olympic Committee's (IOC) Olympic Agenda 2020, which aims to modernize and improve the Olympic Games.


In [11]:
# test
ans2 = rag_llm("what happen in Japan 2024?")
print(ans2["result"]) ## 8 Minutes

 The Japanese Red Cross Society is sending relief teams to help those affected by the massive earthquake that hit Japan on 1 January 2024.


In [13]:
vstore.similarity_search("what happen in Japan 2024?")

[Document(page_content='2024.01.12\n\nThe Japanese Red Cross Society would like to express our sincere condolences\nand sympathy toward the people affected by the massive earthquake which hit\nJapan on 1 January 2024. Our relief teams have been working around the clock\nto save lives, protect health and dignity of the affected people in Noto\nPeninsula.\n\n## 1\\. Situation', metadata={'id': 'de1e9727ae72-3', 'source': 'https://www.jrc.or.jp/english/relief/2024NotoPeninsulaEarthquake.html'}),
 Document(page_content='213. **^** "Japan govt. supplies quake-hit areas, probes damage to ships". NHK. 4 January 2024. Archived from the original on 5 January 2024. Retrieved 4 January 2024.', metadata={'id': 'f60ddb659058-425', 'source': 'https://en.wikipedia.org/wiki/2024_Noto_earthquake'}),
 Document(page_content='209. **^** "Transportation systems along Sea of Japan still affected by Monday\'s quake". NHK. 2 January 2024. Archived from the original on 2 January 2024. Retrieved 2 January 2024.

In [None]:
# https://en.wikipedia.org/wiki/List_of_earthquakes_in_Japan

In [14]:
# test
ans1 = basic_llm(prompt="what is Noto Earthquake?")
print(ans1) #1.5 minutes







In [15]:
# test
ans2 = rag_llm("what is Noto Earthquake?")
print(ans2["result"]) # 6 minutes



 The Noto Peninsula earthquake occurred on January 18, 2024,
with a magnitude of 6.9 and a depth of 10 km. It caused significant damage to
the region, with over 200 fatalities and more than 1,000 injuries.


In [18]:
# test
ans1 = basic_llm(prompt="List down all year when the Noto Earthquake happen")
print(ans1) #1.5 minutes



.


In [17]:
# test
ans2 = rag_llm("List down all year when the Noto Earthquake happen")
print(ans2["result"]) # 8 minutes# test



 The Noto Earthquake happened in 2024.


In [20]:
# test
ans1 = basic_llm(prompt="who gives tsunami warning after Noto Earthquake in 2024?")
print(ans1) #1.5 minutes






In [19]:
# test
ans2 = rag_llm("who gives tsunami warning after Noto Earthquake in 2024?")
print(ans2["result"]) ## 6 minutes



