In [1]:
import logging
import sys

In [2]:
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS

In [3]:
# ログレベルの設定
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, force=True)

In [4]:
# ドキュメントの読み込み
with open("../source.txt") as f:
    test_all = f.read()

In [5]:
# チャンクの分割
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,  # チャンクの最大文字数
    chunk_overlap=20,  # オーバーラップの最大文字数
)
texts = text_splitter.split_text(test_all)

In [6]:
# チャンクの確認
print(len(texts))
for text in texts:
    print(text[:10].replace("\n", "\\n"), ":", len(text))

10
Arsenal Fo : 297
Community  : 202
Arsenal wa : 289
in English : 298
Cups, incl : 215
In 1886, m : 296
fortunes o : 298
the longes : 256
In 2006, t : 297
media.[9]  : 99


In [7]:
# インデックスの作成
# index = FAISS.from_texts(
#     texts=texts,
#     embedding=HuggingFaceEmbeddings(
#         model_name="intfloat/multilingual-e5-large", cache_folder="./model"
#     ),
# )
# index.save_local("storage")

In [8]:
# インデックスの読み込み
index = FAISS.load_local(
    "storage",
    HuggingFaceEmbeddings(model_name="../model/intfloat_multilingual-e5-large"),
)


  from .autonotebook import tqdm as notebook_tqdm


INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: ./model/intfloat_multilingual-e5-large
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu
INFO:faiss.loader:Loading faiss.
INFO:faiss.loader:Successfully loaded faiss.


In [9]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager

In [10]:
# LLMの準備
# llm = OpenAI(temperature=0, verbose=True)
llm = LlamaCpp(
    model_path="../model/llama-2-7b-chat.ggmlv3.q4_0.bin",
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    callback_manager=callback_manager,
    verbose=True,
)

llama.cpp: loading model from ./model/llama-2-7b-chat.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 3917.73 MB (+  256.00 MB per state)
llama_new_context_with_model: kv self size  =  256.00 MB
AVX = 

In [11]:
prompt = """### Instruction: What is the height of Mount Fuji?
### Response:"""
llm(prompt)

 The height of Mount Fuji is 3,776 meters (12,421 feet) above sea level.




llama_print_timings:        load time =  8550.53 ms
llama_print_timings:      sample time =    24.80 ms /    30 runs   (    0.83 ms per token,  1209.77 tokens per second)
llama_print_timings: prompt eval time = 11645.87 ms /    19 tokens (  612.94 ms per token,     1.63 tokens per second)
llama_print_timings:        eval time =  9120.24 ms /    29 runs   (  314.49 ms per token,     3.18 tokens per second)
llama_print_timings:       total time = 20861.47 ms


' The height of Mount Fuji is 3,776 meters (12,421 feet) above sea level.\n\n'

In [12]:
# 質問応答チェーンの作成
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.as_retriever(search_kwargs={"k": 4}),
    verbose=True,
)


DEBUG:langchain.vectorstores.base:Embeddings is not implemented for FAISS


In [13]:
# 質問応答チェーンの実行
print("A1:", qa_chain.run("What team is Arsenal?"))
print("A2:", qa_chain.run("Where is Arsenal based?"))
print("A3:", qa_chain.run("What is Arsenal's motto?"))



[1m> Entering new RetrievalQA chain...[0m


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
Llama.generate: prefix-match hit


 Arsenal Football Club is a team in the Premier League.


llama_print_timings:        load time =  8550.53 ms
llama_print_timings:      sample time =    11.68 ms /    14 runs   (    0.83 ms per token,  1198.84 tokens per second)
llama_print_timings: prompt eval time = 112664.07 ms /   400 tokens (  281.66 ms per token,     3.55 tokens per second)
llama_print_timings:        eval time =  4202.81 ms /    14 runs   (  300.20 ms per token,     3.33 tokens per second)
llama_print_timings:       total time = 116987.74 ms



[1m> Finished chain.[0m
A1:  Arsenal Football Club is a team in the Premier League.


[1m> Entering new RetrievalQA chain...[0m


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.09it/s]
Llama.generate: prefix-match hit


 Holloway, North London


llama_print_timings:        load time =  8550.53 ms
llama_print_timings:      sample time =     5.90 ms /     7 runs   (    0.84 ms per token,  1186.64 tokens per second)
llama_print_timings: prompt eval time = 78574.01 ms /   278 tokens (  282.64 ms per token,     3.54 tokens per second)
llama_print_timings:        eval time =  1765.62 ms /     6 runs   (  294.27 ms per token,     3.40 tokens per second)
llama_print_timings:       total time = 80412.88 ms



[1m> Finished chain.[0m
A2:  Holloway, North London


[1m> Entering new RetrievalQA chain...[0m


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
Llama.generate: prefix-match hit


 According to the context, Arsenal's motto is "Victoria Concordia Crescit", which means "Victory Through Harmony" in Latin.
[1m> Finished chain.[0m
A3:  According to the context, Arsenal's motto is "Victoria Concordia Crescit", which means "Victory Through Harmony" in Latin.



llama_print_timings:        load time =  8550.53 ms
llama_print_timings:      sample time =    31.28 ms /    38 runs   (    0.82 ms per token,  1214.95 tokens per second)
llama_print_timings: prompt eval time = 87377.18 ms /   312 tokens (  280.06 ms per token,     3.57 tokens per second)
llama_print_timings:        eval time = 10879.43 ms /    37 runs   (  294.04 ms per token,     3.40 tokens per second)
llama_print_timings:       total time = 98427.91 ms
