In [11]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")
llm.invoke("The first man on the moon was ...")

  llm = Ollama(model="llama3")


'Neil Armstrong! He stepped out of the lunar module Eagle and onto the moon\'s surface on July 20, 1969, during the Apollo 11 mission. Armstrong famously declared, "That\'s one small step for man, one giant leap for mankind," as he became the first person to set foot on the moon.'

In [10]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

llm = Ollama(
    model="llama3", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)
llm.invoke("The first man on the moon was ...")

  llm = Ollama(


Neil Armstrong! On July 20, 1969, Neil Armstrong and Edwin "Buzz" Aldrin became the first humans to set foot on the Moon as part of the Apollo 11 mission. Armstrong famously declared, "That's one small step for man, one giant leap for mankind," as he stepped off the lunar module Eagle onto the Moon's surface.

'Neil Armstrong! On July 20, 1969, Neil Armstrong and Edwin "Buzz" Aldrin became the first humans to set foot on the Moon as part of the Apollo 11 mission. Armstrong famously declared, "That\'s one small step for man, one giant leap for mankind," as he stepped off the lunar module Eagle onto the Moon\'s surface.'

In [16]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [18]:
from sentence_transformers import SentenceTransformer
from typing import List
    
class MyEmbeddings:
    def __init__(self, model):
        self.model = SentenceTransformer(model, trust_remote_code=True)
    
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [self.model.encode(t).tolist() for t in texts]
            
    def embed_query(self, query: str) -> List[float]:
        return self.model.encode([query]).tolist()

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
from langchain_ollama import OllamaEmbeddings
embed = OllamaEmbeddings(
    model="llama3"
)

In [24]:
input_text = "The meaning of life is 42"
vector = embed.embed_query(input_text)
print(vector[:3])

[0.014715631, -0.0037208814, -0.00076697685]


In [23]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# https://stackoverflow.com/questions/77217193/langchain-how-to-use-a-custom-embedding-model-locally
vectorstore = Chroma.from_documents(documents=splits, embedding=OllamaEmbeddings(model="llama3"))


# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

InvalidDimensionException: Embedding dimension 4096 does not match collection dimensionality 1024

In [28]:
rag_chain.invoke("What is Task Decomposition?")

ValueError: Expected embeddings to be a list of floats or ints, a list of lists, a numpy array, or a list of numpy arrays, got [[[0.0034616945777088404, 0.0015415868256241083, 0.018468406051397324, 0.02181551605463028, -0.03861014544963837, 0.06722038984298706, -0.04830658808350563, -0.03027186170220375, -0.01336503867059946, 0.001540154218673706, 0.025010429322719574, -0.018276656046509743, -0.012959412299096584, 0.008468745276331902, -0.025917181745171547, 0.01732330396771431, 0.010883983224630356, -0.052047669887542725, 0.03249093517661095, -0.0048014381900429726, 0.021406253799796104, -0.008949948474764824, -0.010673483833670616, -0.03578928858041763, 0.011305581778287888, 0.0038023816887289286, 0.0023997249081730843, 0.020839978009462357, 0.04613739997148514, 0.014657679945230484, -0.021194541826844215, 0.023243065923452377, -0.029993586242198944, -0.022755635902285576, -0.015597694553434849, -0.03856169059872627, -0.001305036130361259, -0.010740541853010654, 0.021492280066013336, 0.007334032561630011, -0.05659337341785431, 0.028272001072764397, 0.046544428914785385, -0.04837215319275856, 0.025567365810275078, 0.06750980764627457, -0.07278160750865936, 0.005908491555601358, -0.012848378159105778, 0.06308130919933319, -0.020946871489286423, -0.00967562384903431, 0.034564025700092316, -0.013280174694955349, -0.023367058485746384, 0.05617493763566017, 0.068537637591362, -0.01787486858665943, 0.008327714167535305, 0.020351946353912354, 0.008256496861577034, 0.09225143492221832, -0.005347040481865406, 0.04899594560265541, -0.05659821256995201, 0.033489618450403214, 0.015553885139524937, -0.02623830921947956, 0.016716359183192253, 0.017756937071681023, 0.007199935149401426, -0.040279824286699295, 0.02779482863843441, 0.026686662808060646, -0.026762008666992188, -0.029900819063186646, -0.023958733305335045, -0.02275417186319828, 0.04964396357536316, 0.026093319058418274, -0.03749101608991623, 0.00816443469375372, -0.029134243726730347, 0.006516108755022287, 0.01545940525829792, -0.05627027526497841, 0.02460573799908161, 0.35834869742393494, -0.04261857271194458, -0.03217446431517601, -0.004108958877623081, -0.018920503556728363, 0.008798842318356037, 0.014170482754707336, -0.02630404382944107, 0.04717295616865158, 0.017272839322686195, 0.015789903700351715, 0.05645383894443512, -0.013943691737949848, -0.0003065862401854247, -0.014616467989981174, 0.04939218983054161, -0.008778552524745464, -0.06387610733509064, 0.034550394862890244, -0.055603522807359695, -0.061699770390987396, 0.015259850770235062, 0.0607450008392334, 0.04699864611029625, -0.028395842760801315, -0.01672656275331974, -0.014753902330994606, 0.02804715558886528, -0.018182389438152313, -0.023081596940755844, 0.009128309786319733, 0.005610505118966103, 0.014174384996294975, -0.013382985256612301, -0.03758285939693451, 0.007062382996082306, 0.0077585382387042046, -0.007597854360938072, 8.230493403971195e-05, 0.024898169562220573, -0.046151310205459595, 0.023076221346855164, -0.010778428055346012, -0.003275408875197172, -0.01591690629720688, 0.008206470869481564, -0.040695082396268845, 0.03804268315434456, 0.0004713350790552795, -0.030119750648736954, 0.001180352526716888, 0.03761453926563263, -0.037669651210308075, -0.015885617583990097, -0.02927529439330101, -0.02424309402704239, -0.01122581958770752, 0.04746280238032341, -0.018834836781024933, 0.02087290585041046, 0.01708110421895981, -0.001552184228785336, -0.022735595703125, -0.009271694347262383, -0.03610699251294136, -0.053293999284505844, -0.06694123148918152, -0.019357051700353622, -0.052827030420303345, -0.03297434374690056, -0.03247015178203583, -0.036394305527210236, -0.05914909392595291, -0.024447528645396233, -0.0005732110585086048, -0.011153681203722954, 0.01901889033615589, 0.009043528698384762, 0.001017740578390658, 0.057219069451093674, -0.025275448337197304, 0.03005552664399147, 0.029263649135828018, 0.001586480182595551, -0.0017891956958919764, 0.006008749362081289, -0.040345992892980576, -0.019008168950676918, -0.002294047735631466, 0.025475241243839264, 0.05035865306854248, 0.02923400141298771, 0.02119239792227745, 0.01497112587094307, -0.0031405924819409847, 0.04085350036621094, -0.034356944262981415, 0.013528730720281601, -0.03559362515807152, 0.01117167342454195, 0.019144361838698387, -0.014296884648501873, 0.0037085949443280697, 0.03710566461086273, -0.006627934984862804, 0.026860050857067108, -0.015650898218154907, 0.009854789823293686, -0.0890798419713974, 0.03487073630094528, -0.05781300365924835, -0.0030643839854747057, -0.0038734206464141607, -0.002781454473733902, 0.0013292250223457813, 0.035695839673280716, 0.02304409071803093, -0.02164488472044468, -0.005868051666766405, -0.012256253510713577, -0.01323278620839119, -0.015061913058161736, 0.006144982762634754, -0.0522364005446434, -0.010014551691710949, -0.027762791141867638, -0.003496696474030614, 0.04987754672765732, 0.036905687302351, 0.021798212081193924, -0.055122166872024536, -0.05521538481116295, 0.035040777176618576, 0.0529169999063015, -0.01914379559457302, -0.0023539087269455194, -0.04753763973712921, 0.03691449761390686, 0.015943750739097595, -0.0059417663142085075, -0.012478792108595371, -0.06630117446184158, 0.03907829895615578, -0.008221208117902279, 0.008001229725778103, 0.009265596978366375, 0.006481832824647427, 0.01783035695552826, -0.010621095076203346, 0.02138272486627102, -0.018074898049235344, -0.03781723976135254, -0.006444497033953667, 0.010777318850159645, 0.004693860188126564, -0.006718756165355444, -0.023741845041513443, -0.01519495714455843, 0.03947105631232262, -0.04362049326300621, -0.027742715552449226, 0.0031014701817184687, -0.026284581050276756, -0.01910703256726265, -0.040521446615457535, 0.027604101225733757, -0.05351419001817703, 0.003435874357819557, -0.05352328345179558, 0.037692632526159286, -0.03427863493561745, 0.005508545786142349, 0.018428578972816467, -0.01249223854392767, -0.027027400210499763, -0.0008619131403975189, 0.006027879659086466, -0.0047518243081867695, 0.012749003246426582, -0.0034576912876218557, -0.012818540446460247, -0.034044086933135986, 0.023026857525110245, -0.025129694491624832, 0.00028991600265726447, 0.061028677970170975, 0.022448543459177017, -0.03846392408013344, -0.03689015284180641, 0.006890234537422657, -0.009953946806490421, 0.016136569902300835, 0.002385292435064912, 0.026051323860883713, 0.03272198885679245, -0.014456341974437237, 0.009769030846655369, 0.028973817825317383, 0.01941845938563347, -0.03770027309656143, -0.01616646908223629, -0.005443525034934282, -0.030442172661423683, 0.0011105521116405725, -0.017561430111527443, -0.010618515312671661, 0.023469071835279465, 0.022374970838427544, 0.029452167451381683, -0.05169649422168732, 0.016773393377661705, 0.017324455082416534, -0.011001287028193474, -0.0281462874263525, 0.0020791839342564344, 0.0032098882365971804, -0.06626932322978973, 0.0098688043653965, -0.026454554870724678, 0.01156344823539257, 0.0010096614714711905, -0.045353055000305176, -0.036803439259529114, -0.04168153554201126, -0.03685416281223297, 0.03234991058707237, 0.008225101977586746, 0.06082930788397789, -0.0403708778321743, 0.004245224874466658, 0.030873337760567665, 0.0076123992912471294, 0.00882177334278822, -0.025739185512065887, -0.02287960797548294, -0.03312253952026367, 0.0029635231476277113, 0.001470934017561376, -0.005931748542934656, 0.01741175912320614, -0.011308657005429268, 0.01921858824789524, 0.025095529854297638, 0.060808755457401276, -0.020731108263134956, -0.03228629380464554, -0.008632928133010864, 0.005545154679566622, 0.022543005645275116, 0.0010565746342763305, 0.03470413759350777, -0.02218199521303177, 0.008314421400427818, 0.06280586123466492, -0.010530463419854641, -0.03249327465891838, -0.023677818477153778, 0.025981925427913666, 0.0009634184534661472, -0.018039200454950333, 0.001297835959121585, -0.00830937922000885, 0.003463948844000697, 0.032947756350040436, -0.020365474745631218, 0.08186853677034378, 0.007246889173984528, -0.00516683142632246, 0.021589308977127075, -0.034344784915447235, -0.025664258748292923, 0.030925404280424118, 0.012218141928315163, -0.015286186710000038, -0.0025925091467797756, -0.004235012922435999, -0.02840474434196949, -0.01468499843031168, 0.02544895000755787, 0.022431597113609314, 0.00865736324340105, 0.05815618112683296, 0.05095100775361061, 0.009956925176084042, 0.007565175648778677, 0.005454440601170063, -0.028710531070828438, 0.06121278926730156, -0.039300620555877686, -0.05949133634567261, -0.03435315191745758, 0.02385074645280838, -0.015970619395375252, 0.03032432496547699, -0.010655754245817661, 0.013787107542157173, 0.019525980576872826, 0.02529544197022915, 0.016077088192105293, -0.01379599142819643, -0.004647321067750454, -0.005954412743449211, 0.02117905765771866, -0.028158094733953476, 0.04081398621201515, -0.044030219316482544, 0.00857167411595583, -0.0005425849813036621, -0.01274847611784935, 0.0011957663809880614, -0.029385346919298172, -0.06313733756542206, 0.02791941724717617, 0.036775942891836166, -0.018952777609229088, 0.020529277622699738, 0.010257602669298649, -0.01054935809224844, -0.03817912936210632, 0.017129436135292053, -0.028385691344738007, 0.04151155427098274, -0.007131849881261587, -0.046985551714897156, -0.01597139611840248, 0.0073404028080403805, -0.00031516372109763324, -0.00537000922486186, 0.043967630714178085, 0.039729755371809006, -0.007485102396458387, 0.0052967369556427, -0.05300850793719292, 0.07484841346740723, -0.022281691431999207, -0.01489607710391283, -0.020887091755867004, -0.0036334798205643892, -0.027568064630031586, 0.021328426897525787, 0.0002761953219305724, -0.00621211901307106, -0.03035741113126278, -0.011654659174382687, 0.02438797429203987, -0.005284284241497517, 0.027379672974348068, 0.011296185664832592, -0.01669902913272381, -0.0496395118534565, -0.0452132448554039, -0.03443334624171257, 0.02480943314731121, -0.040217649191617966, -0.0022957483306527138, -0.0159166157245636, 0.041388627141714096, 0.016727490350604057, 0.04759171977639198, -0.01155402697622776, -0.0014438466168940067, -0.030311884358525276, -0.00018905398610513657, 0.05667062848806381, 0.046730887144804, 0.017320526763796806, -0.012970003299415112, 0.04995318129658699, -0.030879219993948936, -0.016462475061416626, -0.02662021666765213, -0.005839016754180193, 0.011445961892604828, 0.03733987361192703, -0.011270783841609955, -0.04681023582816124, -0.028832966461777687, -0.03438331559300423, 0.01172785833477974, -0.05855545029044151, -0.04075980558991432, 0.027161696925759315, 0.004882371053099632, 0.038025207817554474, 0.008094270713627338, 0.012944210320711136, 0.02051018550992012, 0.004328994546085596, -0.025964081287384033, 0.002723350189626217, -0.005579719785600901, -0.03705824539065361, 0.02541184239089489, -0.0013307677581906319, 0.024508880451321602, 0.0015213409205898643, 0.003536509582772851, 0.029731376096606255, -0.01821625977754593, 0.01084093376994133, -0.024569503962993622, 0.0009276246419176459, -0.004730938468128443, -0.014744741842150688, 0.008104715496301651, -0.007624829653650522, -0.019744275137782097, 0.03434056416153908, -0.02564084902405739, 0.004815900232642889, 0.036055516451597214, -0.04556833207607269, -0.03198375180363655, 0.01690254919230938, 0.008783153258264065, -0.0023565921001136303, 0.03741724416613579, 0.036190859973430634, -0.008437731303274632, -0.05615263059735298, 0.007907402701675892, 0.040450938045978546, 0.0026459121145308018, 0.029666591435670853, -0.025030797347426414, -0.05291697010397911, -0.04429179057478905, 0.0003323830896988511, 0.041320864111185074, 0.007702372502535582, 0.02268543653190136, 0.008303551003336906, 0.029749205335974693, 0.003947440069168806, -0.012421908788383007, 0.02107408456504345, -0.03348025679588318, 0.015205368399620056, -0.007900165393948555, 0.03566370904445648, -0.04461133852601051, -0.03561597317457199, -0.06891857087612152, -0.03236326575279236, 0.0022547657135874033, 0.04497417435050011, 0.04895288869738579, 0.0037395558319985867, -0.012660476379096508, 0.01069216150790453, -0.006715577561408281, -0.002441163407638669, 0.01502656377851963, 0.004259808454662561, -0.00552615849301219, 0.019458379596471786, -0.009226568043231964, 0.01896132156252861, 0.0016508512198925018, 0.025957003235816956, -0.020306462422013283, 0.03461887687444687, 0.021952763199806213, -0.01746423915028572, -0.029820535331964493, 0.0015054572140797973, -0.004640664905309677, 0.017439445480704308, -0.02735673449933529, 0.0007390046375803649, 0.012407002970576286, 0.022973012179136276, 0.053086891770362854, 0.004266027826815844, 0.012267986312508583, -0.0342070646584034, -0.024957692250609398, -0.0002384045219514519, -0.022846536710858345, -0.05000677704811096, 0.01886170729994774, -0.036609165370464325, 0.005366338882595301, -0.004699836950749159, 0.038515686988830566, 0.02978156879544258, 0.030573591589927673, -0.011221732012927532, -0.06529457867145538, -0.020443497225642204, -0.05054626241326332, -0.01499298308044672, 0.01692182570695877, 0.009053545072674751, -0.05519954860210419, -0.02011352963745594, -0.01863209903240204, -0.007261214777827263, 0.03453301265835762, 0.011993874795734882, 0.011737550608813763, -0.01739693060517311, 0.0911443680524826, 0.03790319710969925, -0.039769984781742096, -0.04899914562702179, -0.029764264822006226, -0.02596323750913143, 0.02665349282324314, -0.023895397782325745, 0.0005466703441925347, 0.009402742609381676, -0.02859782986342907, -0.05514489859342575, -0.02115434594452381, 0.020944874733686447, -0.0036851507611572742, -0.0200336966663599, -0.01903635449707508, -0.04638763889670372, 0.051453735679388046, -0.011432083323597908, -0.04333769530057907, -0.0424727238714695, -0.040304034948349, -0.016657136380672455, 0.0038338175509124994, -0.04696929454803467, 0.01937667466700077, 0.014101484790444374, -0.0033512646332383156, 0.015988970175385475, 0.03987642005085945, -0.0056791044771671295, -0.042896170169115067, 0.022553257644176483, -0.04739117622375488, -0.010848639532923698, -0.022353585809469223, -0.0003545112849678844, -0.03347406163811684, -0.002579196821898222, -0.02164369821548462, 0.026700690388679504, 0.05123269557952881, 0.02378908358514309, -0.01637396588921547, -0.0173930786550045, 0.01871628314256668, 0.011666406877338886, 0.016941240057349205, -0.0062090312130749226, -0.04509267210960388, 0.001893933629617095, 0.029193421825766563, -0.017556166276335716, -0.024431567639112473, -0.017680982127785683, 0.030715038999915123, -0.0052899643778800964, 0.02801709808409214, -0.03387979045510292, -0.023940496146678925, 0.005096322391182184, -0.011237668804824352, -0.04118649661540985, 0.028571249917149544, -0.002994344336912036, -0.008101945742964745, 0.01802849769592285, -0.014484652318060398, -0.03253050148487091, 0.031737856566905975, 0.029101215302944183, -0.0031476353760808706, 0.019199121743440628, 0.06482964754104614, -0.009231245145201683, -0.06400497257709503, 0.03566385433077812, 0.030822468921542168, -0.018800945952534676, -0.015081999823451042, 0.024510588496923447, -0.0035464284010231495, 0.000364877370884642, -0.03401034325361252, -0.03734453395009041, -0.010161173529922962, -0.03267893195152283, 0.0018280057702213526, -0.010993088595569134, 0.0010971386218443513, 0.0429845005273819, -0.0495513416826725, 0.016069607809185982, 0.02947300858795643, 0.058613263070583344, 0.004701070953160524, 0.01840958371758461, 0.03226591646671295, 0.019168948754668236, 0.0017898364458233118, 0.039536189287900925, -0.023912565782666206, -0.0020543280988931656, 0.056807804852724075, 0.008811770007014275, 0.031286220997571945, -0.03319571167230606, -0.048161882907152176, -0.013455756939947605, 0.06362663954496384, 0.005538516212254763, -0.006241787225008011, 0.010546382516622543, -0.008360925130546093, 0.041325364261865616, -0.049346357583999634, 0.003204773645848036, -0.01337212510406971, 0.053300585597753525, 0.044159144163131714, -0.0054451776668429375, -0.05321784317493439, 0.012363149784505367, 0.042123112827539444, 0.005912222899496555, -0.012755231000483036, 0.005457318387925625, 0.006166375707834959, 0.009062900207936764, 0.06533288955688477, 0.004222619812935591, 0.00702159758657217, -0.008303249254822731, -0.02472514472901821, -0.00737977446988225, 0.022598639130592346, 0.038927383720874786, 0.012037333101034164, 0.012014228850603104, 0.040617190301418304, 0.0363527275621891, -0.007800258696079254, -0.007943622767925262, -0.0570087768137455, -0.028484134003520012, 0.05794519931077957, -0.025854011997580528, -0.05228397250175476, -0.0656319186091423, 0.003013954497873783, 0.002052118768915534, 0.011402974836528301, 0.019956223666667938, -0.00936519168317318, 0.0013065326493233442, -0.009918240830302238, 0.03616080433130264, 0.03939999267458916, -0.08678343892097473, 0.00849747471511364, -0.02278195694088936, 0.00129248620942235, -0.020785927772521973, -0.033539269119501114, -0.004944089334458113, 0.049150675535202026, -0.011248603463172913, 0.020358052104711533, 0.021208779886364937, -0.013904456980526447, -0.013620990328490734, -0.0223873071372509, 0.008988679386675358, 0.0010486772516742349, 0.006483697332441807, -0.04726441204547882, -0.02429869957268238, -0.028060002252459526, 0.03590419143438339, -0.0076019843108952045, -0.012981053441762924, 0.029283178970217705, -0.022061394527554512, 0.008437550626695156, -0.023141490295529366, -0.007656550966203213, 0.0230167955160141, -0.001052389619871974, 0.0012621561763808131, 0.025668034330010414, 0.003350531216710806, 0.0014146751491352916, -0.038691796362400055, 0.04300730302929878, 0.006075464189052582, -0.025158902630209923, 0.02800079621374607, 0.019587622955441475, -0.01698276959359646, 0.009955520741641521, 0.0003850382345262915, 0.019035108387470245, -0.0027707635890692472, -0.01651890017092228, 0.00020980366389267147, -0.011105082929134369, -0.02599325217306614, -0.020011164247989655, -0.04216105118393898, 0.004392881877720356, 0.005204190034419298, -0.00409420020878315, 0.01790650747716427, 0.014927176758646965, 0.016989344730973244, -0.03370166942477226, -0.0458231158554554, 0.006292362231761217, -0.006469886749982834, 0.018700309097766876, -0.033643390983343124, 0.002598144579678774, 0.00941932387650013, -0.00586364371702075, -0.007571952883154154, -0.01889878325164318, 0.04955766350030899, 0.012263205833733082, -0.0755818784236908, -0.011733491905033588, 0.043258778750896454, -0.013175074011087418, -0.059364210814237595, -0.03476269543170929, 0.06303597241640091, 0.039042118936777115, 0.03188707306981087, -0.01993551477789879, -0.021451633423566818, 0.03735959529876709, -0.00885835848748684, 0.02606712467968464, 0.00013236734957899898, 0.01712755300104618, 0.00528830848634243, 0.02901030145585537, -0.0032307785004377365, 0.024264303967356682, 0.0006783701246604323, -0.014054100960493088, 0.03777948021888733, 0.00869035068899393, 0.015078497119247913, -0.02428070269525051, -0.0030402722768485546, 0.030954018235206604, -0.02246762625873089, 0.007972517050802708, 0.02467910759150982, -0.015968695282936096, -0.04056045785546303, -0.02221573330461979, 0.00984408613294363, 0.03518495336174965, -0.010253814980387688, -0.06506281346082687, 0.023622017353773117, -0.004829320125281811, -0.006052817218005657, -0.05080762878060341, -0.07938174158334732, -0.04690618813037872, -0.02909410372376442, 0.027205193415284157, 0.04819810763001442, 0.010632144287228584, -0.04908064007759094, 0.05178585276007652, -0.0033127088099718094, -2.3516671717516147e-05, -0.0713091716170311, 0.015355682000517845, 0.045955169945955276, -0.037577904760837555, 0.023343242704868317, 0.0053481487557291985, -0.022547179833054543, 0.02095130831003189, -0.008230683393776417, -0.005310258362442255, -0.04065823182463646, -0.0014466219581663609, 0.012830358929932117, -0.010867995209991932, -0.058291565626859665, -0.0053076655603945255, -0.01079685240983963, 0.020732002332806587, -0.03289363533258438, 0.014400794170796871, 0.009034150280058384, -0.01641123928129673, -0.003958506975322962, 0.04030700773000717, -0.05043153837323189, -0.008530823513865471, 0.01064834464341402, -0.05532441660761833, -0.018583906814455986, 0.02091410756111145, -0.008819900453090668, -0.04115108773112297, 0.03481145203113556, -0.042013052850961685, 0.028666628524661064, -0.03234441950917244, -0.013014431111514568, 0.002421681536361575, -0.038368482142686844, 0.0017728321254253387, -0.016833139583468437, 0.040900446474552155, 0.025439849123358727, 0.06742728501558304, -0.028197698295116425, 0.011660267598927021, -0.006516147870570421, 0.031021300703287125, -0.010888981632888317, -0.043951261788606644, 0.004962387960404158, -0.05065905675292015, -0.006893993355333805, 0.030268829315900803, -0.022963304072618484, -0.03107449598610401, -0.012830360792577267, 0.041972577571868896, -0.010126767680048943, -0.012110444717109203, -0.04988789185881615, -0.008858977816998959, -0.0373859703540802, -0.0008807436097413301, -0.027268871665000916, 0.04801854491233826, 0.04080534726381302, 0.01606239750981331, 0.00034879505983553827, 0.033599644899368286, -0.015279504470527172, -0.026285771280527115, 0.008778206072747707, 0.0462041012942791, 0.048440732061862946, 0.009785027243196964, -0.011522720567882061, 0.036235395818948746, -0.008345741778612137, 0.026283789426088333, -0.03597809001803398, 0.014036250300705433, 0.019765937700867653, -0.04090142995119095, 0.023161834105849266, 0.037211108952760696, -0.007346062920987606, 0.04354625195264816, 0.008502448908984661, 0.028333419933915138, 0.020505042746663094, -0.04226696118712425, 0.01340771559625864, -0.03773699328303337, -0.03885430097579956, -0.010884908027946949, -0.03089321032166481, -0.017973942682147026, 0.06187426298856735, -0.011021302081644535, -0.020985517650842667, -0.04166233167052269, -0.0445975586771965, -0.004873992409557104, -0.014563697390258312, 0.010862573981285095, -0.03640858456492424, -0.0036265263333916664, 0.046833496540784836, 0.023333093151450157, -0.014952999539673328, 0.008159195072948933, -0.01262173056602478, 0.013383347541093826, 0.03808555006980896, 0.04018186032772064, 0.04317162185907364, -0.0346568338572979, 0.0074606831185519695, 0.050989780575037, -0.03939469903707504, 0.013329176232218742, -0.026150522753596306, -0.00018525196355767548, 0.01186135970056057, 0.013230242766439915, 0.0216831024736166, 0.00579712213948369, 0.032065268605947495, -0.009003416635096073, 0.014125235378742218, -0.010022953152656555, -0.0245481226593256, -0.006678412668406963, -0.02633616141974926, -0.03568033128976822, 0.04135245084762573, 0.03853712975978851, -0.010157142765820026, -0.016184642910957336, 0.0231179166585207, -0.024164998903870583, -0.013336471281945705, 0.028067955747246742, -0.008587958291172981, 0.007762477733194828, -0.04304571449756622, 0.018566487357020378, -0.021918727084994316, 0.022042324766516685, -0.007243278436362743, 0.0050511350855231285, 0.025674404576420784, 0.03323099762201309]]] in query.

# Local Llama3 Rag with Ollama Setup

In [22]:
import fitz  # PyMuPDF

def pdf_to_text(pdf_path):
    # Open the PDF file
    document = fitz.open(pdf_path)
    text = ""

    # Iterate through each page
    for page_num in range(len(document)):
        page = document.load_page(page_num)  # Load the page
        text += page.get_text()  # Extract text from the page

    return text

# Example usage
pdf_path = "/Users/aryanbarik/Downloads/nstarx-corpus/February2025TalbotsCatalog.pdf"  # Replace with your PDF file path
text = pdf_to_text(pdf_path)
print(text)

FEBRUARY 2025
look good
ok good
feel good
look good
feel good
Longer, brighter days are just ahead,  
and our new collection is here and ready  
to make this your feel-good February. 
Gorgeous colors. Tweeds and cozy textures.  
Great-fitting pants to freshen up your 
rotation. Beautiful matching sets with endless 
versatility. It’s pulled-together perfection for 
all the sunshine around the bend.
TALBOTS © 2025
Front Cover:
A. Floral Medallion Ruffle Top
Band neck. Long sleeve. At hip. M251/2"; 
P241/2"; W281/2"; WP271/2". Fully lined. 
Polyester. Machine wash. Mulberry wine/
rose sorbet. A01511
M XS-XL 	 P P-XL 	
$99.50
W X-3X 	
WP X-3X 	
$109
INDIGO
B. Ribbed Midi Skirt  Pull on. M301/2"; 
P281/2"; W293/4"; WP273/4". Viscose. 
Machine wash. Mulberry wine. B03511 
M XS-XL 	 P P-XL 	
$119
W X-3X 	
WP X-3X 	
$139
INDIGO
A. Slim Johnny Collar Pullover V-neck. 
Long sleeve. At hip. Viscose/LYCRA®. 
Machine wash. Mulberry wine. A03511  
M XS-XL	
P P-XL 	
$99.50
W X-3X 	
WP X-3X 	
$109
C. 

In [23]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Initialize the embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Create a FAISS index
dimension = 384  # Dimension of the embeddings
index = faiss.IndexFlatL2(dimension)

# Example documents
documents = [text]

# Generate embeddings for the documents
document_embeddings = embedding_model.encode(documents)

# Add embeddings to the FAISS index
index.add(np.array(document_embeddings))

  from .autonotebook import tqdm as notebook_tqdm


In [24]:
import ollama

def retrieve_relevant_documents(query, top_k=2):
    # Generate query embedding
    query_embedding = embedding_model.encode([query])
    
    # Search the FAISS index for the most relevant documents
    distances, indices = index.search(np.array(query_embedding), top_k)
    
    # Retrieve the relevant documents
    relevant_documents = [documents[i] for i in indices[0]]
    return relevant_documents

def generate_response(query):
    # Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(query)
    
    # Combine the query and relevant documents into a prompt
    context = "\n".join(relevant_docs)
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    
    # Generate response using Llama3 through Ollama
    response = ollama.generate(model='llama3', prompt=prompt)
    
    return response['response']

# Example query
query = "Give a summary of the document."
response = generate_response(query)
print(response)

The document appears to be a Talbots clothing catalog for February 2025. It features a variety of clothing items, including tops, bottoms, dresses, outerwear, and accessories, in various colors and patterns. The catalog includes descriptions of each item, prices, and sizes available. There are also sections on new arrivals, customer service information, and ways to shop or contact Talbots.


In [25]:
queries = ['Give a summary of the document.', 
'When was the catalog published?' ,
'What is the description of the Slim Johnny Collar Pullover?' ,
'What are the prices for the Slim Johnny Collar Pullover?',
'What is the description of the Collarless Tweed Jacket?',
'What are the prices for the Collarless Tweed Jacket?',
'What are the prices for the Ribbed Midi Skirt?',
'What is the price for the Faceted Stones Necklace?',
'What are the prices of the Floral Medallion Ruffle Top?',
'What are the prices of the Jasmine Stripe Ribbed Crewneck Tee?',
'What is the description of the Jasmine Stripe Ribbed Crewneck Tee?']


In [26]:
responses = []

for query in queries:
    responses.append(generate_response(query))
    
responses

['The document appears to be a Talbots clothing catalog for February 2025. It features a variety of clothing items, including tops, bottoms, dresses, and outerwear, in various styles, colors, and sizes. The catalog includes descriptions of each item, prices, and ordering information. It also includes sections on new arrivals, customer service, and social media links.',
 'According to the information provided in the catalog, it was published in FEBRUARY 2025.',
 'There is no mention of a "Slim Johnny Collar Pullover" in this catalog. The closest match would be the "Ribbed Johnny Collar Pullover" mentioned on page 76, which has the description: "Long sleeve. At hip. Cotton/polyester. Machine wash."',
 "According to the catalog, the prices for the Ribbed Johnny Collar Pullover (A76511) are:\n\n* Men's XS-XL: $89.50\n* Women's X-3X: $99.50\n\nPlease note that these prices only apply to this specific style and size. Prices may vary depending on other factors, such as location or promotions.