In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/LangChain-VectorStore

/content/drive/MyDrive/LangChain-VectorStore


In [3]:
%ls

LICENSE  [0m[01;34mnotebooks[0m/  README.md  requirements.txt  [01;34msrc[0m/


In [4]:
!pip install -r requirements.txt

Collecting faiss-cpu (from -r requirements.txt (line 6))
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting PyPDF2 (from -r requirements.txt (line 7))
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2, faiss-cpu
Successfully installed PyPDF2-3.0.1 faiss-cpu-1.9.0.post1


## Example1: Process PDF, Chunk, Vectorize, and Build Index

In [10]:
from src.pdf_processing import extract_text_from_pdf, chunk_text
from src.vector_store import text_to_vectors, build_faiss_index, query_vector_store

# Load and process PDF
text = extract_text_from_pdf("data/pdfs/MERS.pdf")
chunks = chunk_text(text)
vectors = text_to_vectors(chunks)

# Build and query FAISS index
index = build_faiss_index(vectors)
print(f"Number of vectors in the index: {index.ntotal}")  # Check number of vectors


Number of vectors in the index: 20


In [6]:
distances

array([[0.7743592 , 0.9810858 , 0.98144263, 1.0074117 , 1.0537744 ]],
      dtype=float32)

In [7]:
text

"Contents lists available at ScienceDirect\nAntiviral Research\njournal homepage: www.elsevier.com/locate/antiviral\nMERS: Progress on the global response, remaining challenges and the way\nforward\nFAO-OIE-WHO MERS Technical Working Group∗(Ryan Aguannoa, Ahmed ElIdrissia,\nAmgad A. Elkholyb, Peter Ben Embarekb, Emma Gardnera, Rebecca Grantc, Heba Mahrousa,\nMamunur Rahman Malikb, Gounalan Pavaded, Sophie VonDobschuetza, Lidewij Wiersmaa,\nMaria D. Van Kerkhoveb))\naFood and Agriculture Organization of the United Nations, Italy\nbHealth Emergencies Programme, World Health Organization, Switzerland\ncCenter for Global Health, Institut Pasteur, France\ndWorld Organization for Animal Health, France\nARTICLE INFO\nKeywords:\nMERS-CoVResearchAnimal-human interfaceDromedary camels\nZoonosis\nVaccineABSTRACT\nThis article summarizes progress in research on Middle East Respiratory Syndrome (MERS) since a FAO-OIE-\nWHO Global Technical Meeting held at WHO Headquarters in Geneva on 25 –27 Septem

In [8]:
chunks

['Contents lists available at ScienceDirect Antiviral Research journal homepage: www.elsevier.com/locate/antiviral MERS: Progress on the global response, remaining challenges and the way forward FAO-OIE-WHO MERS Technical Working Group∗(Ryan Aguannoa, Ahmed ElIdrissia, Amgad A. Elkholyb, Peter Ben Embarekb, Emma Gardnera, Rebecca Grantc, Heba Mahrousa, Mamunur Rahman Malikb, Gounalan Pavaded, Sophie VonDobschuetza, Lidewij Wiersmaa, Maria D. Van Kerkhoveb)) aFood and Agriculture Organization of the United Nations, Italy bHealth Emergencies Programme, World Health Organization, Switzerland cCenter for Global Health, Institut Pasteur, France dWorld Organization for Animal Health, France ARTICLE INFO Keywords: MERS-CoVResearchAnimal-human interfaceDromedary camels Zoonosis VaccineABSTRACT This article summarizes progress in research on Middle East Respiratory Syndrome (MERS) since a FAO-OIE- WHO Global Technical Meeting held at WHO Headquarters in Geneva on 25 –27 September 2017. The meet

In [9]:
vectors

array([[-0.0124561 ,  0.03324837,  0.01444573, ..., -0.11330806,
        -0.02787357,  0.07245143],
       [ 0.00438296,  0.08811328, -0.03757674, ..., -0.05256693,
         0.01694409,  0.06886669],
       [-0.03205243,  0.07444865,  0.05682194, ..., -0.01268619,
         0.04622333,  0.02322259],
       ...,
       [-0.02579407,  0.07377002, -0.03573692, ..., -0.02163483,
         0.02143645,  0.10362157],
       [-0.00511002,  0.05486447,  0.02700833, ..., -0.12491824,
         0.02665311,  0.03074878],
       [ 0.00925221,  0.04045442, -0.01566657, ..., -0.03281787,
        -0.09474514,  0.04823146]], dtype=float32)

## Example2: Query

In [11]:
from src.vector_store import query_vector_store

# Example: Query the FAISS index
query = "Epidemiology of outbreaks in MERS"
distances, indices = query_vector_store(query, index, k=5)


# Output the results
print(f"Top {len(indices[0])} results for the quaery:")
for i, idx in enumerate(indices[0]):
    print(f"Chunk {idx} with distance {distances[0][i]}: {chunks[idx]}")

Top 5 results for the query:
Chunk 1 with distance 0.7534449100494385: August 2018, more than 2249 human cases from 27 countries have been reported to the World Health Organization (WHO) (World Health Organization, 2017a ). The FAO, OIE and WHO Tripartite have regularly brought together aﬀected member states, public health and animal o ﬃcials, and aca- demics to discuss what is known and unknown about the zoonotic origin of MERS-CoV ( World Health Organization, 2016 ;FAO, 2016, 2014 ;WHO Regional o ﬃce for the Eastern Mediterranean, 2013a ). The purposes of these meetings and workshops have been to advocate for more surveillance and research on MERS-CoV in animals and humans, to share information about how MERS-CoV is transmitted between animals, from animals to humans and between humans, to describe the diseases it causes, and to develop policies and guidelines for detection, https://doi.org/10.1016/j.antiviral.2018.09.002 Received 30 August 2018; Accepted 4 September 2018∗Correspondi

## Example3: Add New Data