In [None]:
%pip install langchain langchain-openai langchain-community faiss-cpu tiktoken

In [2]:
from langchain_openai import OpenAI 
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

In [4]:

loader = TextLoader("metallica.txt")

In [5]:
documents = loader.load()

In [6]:
documents

[Document(page_content='Metallica is an American heavy metal band. The band was formed in 1981 in Los Angeles by vocalist and guitarist James Hetfield and drummer Lars Ulrich, and has been based in San Francisco for most of its career.[1][2] The band\'s fast tempos, instrumentals and aggressive musicianship made them one of the founding "big four" bands of thrash metal, alongside Megadeth, Anthrax and Slayer. Metallica\'s current lineup comprises founding members and primary songwriters Hetfield and Ulrich, longtime lead guitarist Kirk Hammett and bassist Robert Trujillo. Guitarist Dave Mustaine, who formed Megadeth after being fired from Metallica, and bassists Ron McGovney, Cliff Burton and Jason Newsted are former members of the band.\n\nMetallica first found commercial success with the release of its third album, Master of Puppets (1986), which is cited as one of the heaviest metal albums and the band\'s best work. The band\'s next album, ...And Justice for All (1988), gave Metalli

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
    length_function=len,
)

In [8]:
docs = text_splitter.split_documents(documents)

In [10]:
len(docs)
docs[311]

Document(page_content="External links\nMetallica\nat Wikipedia's sister projects\n\n    Media from Commons\n    Quotations from Wikiquote\n    Data from Wikidata\n\n    Official website\n    Metallica at AllMusic Edit this at Wikidata\n    Metallica at Curlie\n    Metallica discography at Discogs Edit this at Wikidata\n    Metallica discography at MusicBrainz\n\n    vte\n\nMetallica\nAwards for Metallica\nAuthority control databases Edit this at Wikidata\nCategories:", metadata={'source': 'metallica.txt'})

In [11]:
embedding = OpenAIEmbeddings()

In [15]:
%pip install faiss-cpu

Collecting faiss-cpu
  Using cached faiss_cpu-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Using cached faiss_cpu-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0
Note: you may need to restart the kernel to use updated packages.


In [16]:
import faiss

In [17]:
library = FAISS.from_documents(docs, embedding)

In [18]:
Query1 = "Who replaced Cliff Burton in Metallica?"

In [19]:
Query_Answer = library.similarity_search(Query1)

In [23]:
print(Query_Answer[0])

page_content="Burton's death left Metallica's future in doubt. The three remaining members decided Burton would want them to carry on, and with the Burton family's blessings, the band sought a replacement.[35] Roughly 40 people – including Hammett's childhood friend, Les Claypool of Primus; Troy Gregory of Prong; and Jason Newsted, formerly of Flotsam and Jetsam – auditioned for the band to fill Burton's spot. Newsted learned Metallica's entire setlist; after the audition, Metallica invited him to Tommy's" metadata={'source': 'metallica.txt'}


In [24]:
docs_and_scores = library.similarity_search_with_score(Query1)

In [25]:
docs_and_scores[0]

(Document(page_content="Burton's death left Metallica's future in doubt. The three remaining members decided Burton would want them to carry on, and with the Burton family's blessings, the band sought a replacement.[35] Roughly 40 people – including Hammett's childhood friend, Les Claypool of Primus; Troy Gregory of Prong; and Jason Newsted, formerly of Flotsam and Jetsam – auditioned for the band to fill Burton's spot. Newsted learned Metallica's entire setlist; after the audition, Metallica invited him to Tommy's", metadata={'source': 'metallica.txt'}),
 0.2178742)

In [26]:
docs_and_scores[1]

(Document(page_content="1984–1986: Ride the Lightning, Master of Puppets, and Burton's death\nCliff Burton (pictured in 1985) replaced Ron McGovney as the bassist in 1982 and played with the band until his death in 1986.", metadata={'source': 'metallica.txt'}),
 0.27124405)

In [27]:
docs_and_scores[2]

(Document(page_content="1986–1994: Newsted joins, ...And Justice for All and Metallica\nJason Newsted (pictured in 2013) joined Metallica soon after Cliff Burton's death in 1986.", metadata={'source': 'metallica.txt'}),
 0.27820975)

In [28]:
docs_and_scores[3]

(Document(page_content='McGovney to leave because they thought he "didn\'t contribute anything, he just followed."[16] Although Burton initially declined the offer, by the end of the year, he had accepted on the condition that the band move to El Cerrito in the San Francisco Bay Area.[16] Metallica\'s first live performance with Burton was at the nightclub The Stone in March 1983, and the first recording to feature Burton was the Megaforce demo (1983).[16]', metadata={'source': 'metallica.txt'}),
 0.27859023)

In [29]:
retriever = library.as_retriever()


In [30]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)

In [31]:
retriever_query = 'What is the most hated Metallica Album'

In [32]:
results = qa.invoke(retriever_query)

In [33]:
print(results)

{'query': 'What is the most hated Metallica Album', 'result': ' According to the context given, the album that received the most criticism and mixed reactions from critics is St. Anger, released in 2003. However, it is not explicitly stated as the most hated album.'}


In [34]:
library.save_local("faiss_index_metallica")

In [47]:
metallica_saved = FAISS.load_local("faiss_index_metallica", embedding,allow_dangerous_deserialization = True)

In [49]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=metallica_saved.as_retriever())

In [52]:
retriever_query2 = "What Metallica album does fans hate the most?"

In [53]:
results = qa.invoke(retriever_query2)

In [54]:
print(results)

{'query': 'What Metallica album does fans hate the most?', 'result': ' Fans have expressed mixed reactions to Metallica\'s eighth studio album, St. Anger (2003), with some criticizing the "steely" sounding snare drum and absence of guitar solos. However, it still debuted at number one on the Billboard 200 chart.'}


In [58]:
%pip install PyPDF2

Note: you may need to restart the kernel to use updated packages.


In [59]:
import PyPDF2
from langchain_community.embeddings import OllamaEmbeddings

In [60]:
    pdf_file_name = "BuddhistMeditation.pdf"
    #pdf_stream = BytesIO(content)
    pdf = PyPDF2.PdfReader(pdf_file_name)
    pdf_text = ""
    for page in pdf.pages:
        pdf_text += page.extract_text()

In [61]:
pdf_text

'靜坐參禪-1 ⼀、靜坐的注意事項靜坐是一個定心的方法，靜坐要得力，各方面都要調。調身是調靜坐的姿勢，靜坐時參禪、念佛、持咒、觀想或是數息，則屬於調心。平時要調飲食，飲是流體，食是固體，飲食要均衡。水喝太多，身體會虛、濕氣重、容易酸痛。水喝太少，身體燥氣重，就容易躁動，不容易得定。飲食要清淡一點、少一點，吃東西口味重或是吃太多就容易昏沈。最好吃素，少造殺業，多培養慈悲心。還要調睡眠，睡太少，精神不容易集中。睡太多，人容易生起妄念。\t靜坐剛開始，姿勢調好之後，就接著調息，就是調呼吸的氣息。做三次吐納，用鼻子吸氣、用嘴巴吐氣，吸得慢、吐得慢。吸氣時觀想乾淨的空氣進到身體內，吐氣時觀想體內的髒空氣都從嘴巴吐出去。這樣的呼吸調整做三次就夠，不必多做，否則容易耗氣，不利身體健康。調息三次後，就開始專心靜坐調心，不管用數息、念佛、參禪、持咒或是修觀行，就是讓心定下來、平靜、寧靜，進而開悟契入空覺。\t靜坐主要有四種坐姿：雙盤、單盤、散盤、掛腿坐。雙盤就是先將左腳盤到右大腿胯部，右腳再盤到左大腿胯部，佛像都是這樣雙盤。如果做不到，也可以反過來：先把右腳放在左大腿胯部，再把左腳放在右大腿胯部。雙盤的重心比較穩固，所以能夠雙盤就盡量雙盤，不能雙盤就單盤。單盤就是左腳放在右大腿上面，或是反過來右腳放左大腳上面。散盤就是兩腳交叉坐。\t靜坐時不要直接坐在地板上，因為地板濕氣重，不利身體健康。如果沒有大椰墊，可以鋪舊棉被或是榻榻米墊，再坐在上面。雙盤就直接坐在墊子上面，臀部下面不必再墊小坐墊，但是也可以用毛巾或毯子墊一個薄的厚度在臀部下面，因為靜坐到某個程度，臀部的氣容易堵住，墊著薄坐墊能讓氣比較容易通。如果是單盤或散盤，就需要在臀部下面再墊一個小墊子，否則坐時肚子內縮，身體就無法坐直。但是臀部不要坐滿小墊子，只坐前緣。要注意單盤的一個問題，單盤時身體為了平第⾴1衡，很容易坐姿傾斜，慢慢脊椎也會傾斜，所以要盡量坐直。平常在辦公室或是穿裙子等不方便盤腿的情況下，可以採掛腿坐。掛腳坐就是坐在椅子上，上半身保持打坐的姿勢，兩腳打開與肩同寬。兩腳尖平行不打開，否則氣容易散掉。\t靜坐時身體保持放鬆，不要刻意挺腰、挺胸，否則火氣會上升。但也不能坐腰、含胸拔背，這樣內臟沒有活動空間。可以先整個上身往前彎，再慢慢抬起來。收下顎，舌頭抵住上顎，脖子靠著後領子。再來手結印放在胯部上，如果是右腿在上，就結三

In [63]:
    # Split the text into chunks
    texts = text_splitter.split_text(pdf_text)

    # Create a metadata for each chunk
    metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]


In [64]:
texts

['靜坐參禪-1',
 '⼀、靜坐的注意事項靜坐是一個定心的方法，靜坐要得力，各方面都要調。調身是調靜坐的姿勢，靜坐時參禪、念佛、持咒、觀想或是數息，則屬於調心。平時要調飲食，飲是流體，食是固體，飲食要均衡。水喝太多，身體會虛、濕氣重、容易酸痛。水喝太少，身體燥氣重，就容易躁動，不容易得定。飲食要清淡一點、少一點，吃東西口味重或是吃太多就容易昏沈。最好吃素，少造殺業，多培養慈悲心。還要調睡眠，睡太少，精神不容易集中。睡太多，人容易生起妄念。\t靜坐剛開始，姿勢調好之後，就接著調息，就是調呼吸的氣息。做三次吐納，用鼻子吸氣、用嘴巴吐氣，吸得慢、吐得慢。吸氣時觀想乾淨的空氣進到身體內，吐氣時觀想體內的髒空氣都從嘴巴吐出去。這樣的呼吸調整做三次就夠，不必多做，否則容易耗氣，不利身體健康。調息三次後，就開始專心靜坐調心，不管用數息、念佛、參禪、持咒或是修觀行，就是讓心定下來、平靜、寧靜，進而開悟契入空覺。\t靜坐主要有四種坐姿：雙盤、單盤、散盤、掛腿坐。雙盤就是先將左腳盤到右大腿胯部，右腳再盤到左大腿胯部，佛像都是這樣雙盤。如果做不到，也可以反過來：先把右腳放在左大腿胯部，再把左腳放在右大腿胯部。雙盤的重心比較穩固，所以',
 '能夠雙盤就盡量雙盤，不能雙盤就單盤。單盤就是左腳放在右大腿上面，或是反過來右腳放左大腳上面。散盤就是兩腳交叉坐。\t靜坐時不要直接坐在地板上，因為地板濕氣重，不利身體健康。如果沒有大椰墊，可以鋪舊棉被或是榻榻米墊，再坐在上面。雙盤就直接坐在墊子上面，臀部下面不必再墊小坐墊，但是也可以用毛巾或毯子墊一個薄的厚度在臀部下面，因為靜坐到某個程度，臀部的氣容易堵住，墊著薄坐墊能讓氣比較容易通。如果是單盤或散盤，就需要在臀部下面再墊一個小墊子，否則坐時肚子內縮，身體就無法坐直。但是臀部不要坐滿小墊子，只坐前緣。要注意單盤的一個問題，單盤時身體為了平第⾴1衡，很容易坐姿傾斜，慢慢脊椎也會傾斜，所以要盡量坐直。平常在辦公室或是穿裙子等不方便盤腿的情況下，可以採掛腿坐。掛腳坐就是坐在椅子上，上半身保持打坐的姿勢，兩腳打開與肩同寬。兩腳尖平行不打開，否則氣容易散掉。\t靜坐時身體保持放鬆，不要刻意挺腰、挺胸，否則火氣會上升。但也不能坐腰、含胸拔背，這樣內臟沒有活動空間。可以先整個上身往前彎，再慢慢抬起來。收下顎，舌頭抵住上顎，脖子靠著後領子。再來手結印放在胯部上，如

In [66]:
len(texts)

41

In [72]:
texts[5]

'代表心不受控。可以頭稍微低一點去壓制頸部的動脈，心就會比較定。其實當妄念起來，只要不理它，專心在方法上面用功，妄念就會不見。妄念之所以還在，是因為心繼續起妄念。靜坐參禪如果妄念很多，可以先持咒、念佛或數息，等心比較定了再繼續參禪。要注意：昏沈妄想都會成習慣，所以不能放任不管。\t工欲善其事，必先利其器。盤腿是要練的，從散盤慢慢練成單盤、再慢慢變成雙盤，不能一直停在單盤、散盤。雖然有人天生腿腳比較好，一下子就可以雙盤，大部分人都是慢慢練來的。但是也不要躁進，否則容易受傷。靜坐時如果對身體有感覺，是由於色蘊不空，對身體的執著還在。有任何身體的感受，不要理會它，專心在方法上用功。心越專注，身體的感受越少。平常也可以拜佛或是多練習打坐，自然能夠坐得更久。雖然靜坐跟成道證果沒有直接關係，但是對於定心、靜心是有幫助的。\t⼆、「迴光返照、返聞⾃性」的參禪⽅便禪宗是用參禪起疑情來破執著、開悟，這裡教大眾用一個「迴光返照、返聞自性」的參禪方便來參問：「誰是我本來面目？」本來面目就第⾴3是父母未生之前的面目，就是清淨本心，就是念佛的是誰。禪宗講，每個人都有清淨本心，這個清淨本心就是自己父母未生之前的本來面目'

In [76]:
pdf_library = FAISS.from_texts(texts, embedding)

In [77]:
pdf_retriever = pdf_library.as_retriever()

In [78]:
pdf_retriever_query = '何謂參禪？ 請用中文回答'
pdf_retriever_query2 = '何謂修行？ 請用中文回答'

In [79]:
pdf_results = qa.invoke(pdf_retriever_query)

In [80]:
print(pdf_results)

{'query': '何謂參禪？ 請用中文回答', 'result': ' 參禪是指參與禪宗的修行，通常指的是坐禪冥想。透過冥想來觀照自己內心的想法，達到心靈的平靜和覺醒。禪宗的修行強調實踐和親身體驗，而非僅僅是理解概念。參禪是一種內省的修行方式，可以幫助人們更好地了解自己，平靜心境，提升覺悟。'}


In [1]:
from langchain.prompts import PromptTemplate


In [5]:
    prompt_template = """Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you dont know, don't try to make up an answer.
    
    {context}
    
    Question: {question}
    Answer in {language_choice}:
    """
    
    PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["language_choice"],
    )
    
    languageDefined= PROMPT.format(language_choice="French", context="{context}", question="{question}")

In [6]:
print(languageDefined)

Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you dont know, don't try to make up an answer.

{context}

Question: {question}
Answer in French:



In [7]:
type(languageDefined)

str

In [8]:
from langchain.chains import (
    ConversationalRetrievalChain,
)

In [None]:
    chain = ConversationalRetrievalChain.from_llm(
        #ChatOpenAI(api_key=openai_api_key, model="gpt-4"),
        ChatOllama(model=chat_model_name),
        chain_type="stuff",
        combine_docs_chain_kwargs=chain_type_kwargs,
        retriever=docsearch.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

In [12]:
%pip install -qU langchain-openai
%pip install langchain-community

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
from dotenv import load_dotenv

In [5]:
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

In [6]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [13]:
%pip install langchain

Note: you may need to restart the kernel to use updated packages.


In [22]:
!pip install "langchain[docarray]" 
%pip install --upgrade docarray langchain

Collecting docarray
  Downloading docarray-0.40.0-py3-none-any.whl.metadata (36 kB)
Collecting langchain
  Downloading langchain-0.1.14-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community<0.1,>=0.0.30 (from langchain)
  Downloading langchain_community-0.0.31-py3-none-any.whl.metadata (8.4 kB)
Collecting langchain-core<0.2.0,>=0.1.37 (from langchain)
  Downloading langchain_core-0.1.39-py3-none-any.whl.metadata (5.9 kB)
Downloading docarray-0.40.0-py3-none-any.whl (270 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.2/270.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hDownloading langchain-0.1.14-py3-none-any.whl (812 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.8/812.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading langchain_community-0.0.31-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m8.0 MB/s[0m eta [

In [28]:
# Requires:
# pip install langchain docarray tiktoken

from langchain_community.vectorstores import DocArrayInMemorySearch
import docarray
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import OpenAIEmbeddings

vectorstore = DocArrayInMemorySearch.from_texts(
    ["harrison worked at Kenosha", "bears like to eat honey"],
    embedding=OpenAIEmbeddings(),
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("where did harrison work?")

'Harrison worked at Kenosha.'

In [29]:
retriever.invoke("where did harrison work?")

[Document(page_content='harrison worked at Kenosha'),
 Document(page_content='bears like to eat honey')]

In [30]:
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)

In [31]:
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

In [32]:
chain.invoke("where did harrison work?")

'Harrison worked at Kenosha.'