In [15]:
from langchain_community.document_loaders import TextLoader, WebBaseLoader, PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
import bs4
from langchain_community.llms import Ollama

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_KEY")

#### Loading txt file

In [2]:
txt_loader = TextLoader("lll.txt")
txt_doc = txt_loader.load()
txt_doc

[Document(page_content='Large Language Models (LLMs) are deep learning models trained on massive datasets to generate human-like text. They use Transformer architectures with self-attention mechanisms to process and generate contextually relevant text. Retrieval-Augmented Generation (RAG) enhances LLMs by providing external knowledge retrieval from databases, improving accuracy and factual consistency.\n', metadata={'source': 'lll.txt'})]

#### Let's load the information about my country- Bangladesh

In [3]:
web_loader = WebBaseLoader(web_path="https://en.wikipedia.org/wiki/Bangladesh",
                           bs_kwargs=dict(parse_only = bs4.SoupStrainer(
                               class_ = ("client-js", "skin--responsive", "mw-page-container-inner")
                           )))
web_doc = web_loader.load()
web_doc

[Document(page_content='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nEtymology\n\n\n\n\n\n\n\n\n2\nHistory\n\n\n\n\n\n\n\n\n3\nGeography\n\n\n\n\nToggle Geography subsection\n\n\n\n\n\n3.1\nClimate\n\n\n\n\n\n\n3.1.1\nClimate change\n\n\n\n\n\n\n\n\n\n\n3.2\nBiodiversity\n\n\n\n\n\n\n\n\n\n\n4\nGovernment and politics\n\n\n\n\nToggle Government and politics subsection\n\n\n\n\n\n4.1\nAdministrative divisions\n\n\n\n\n\n\n\n\n4.2\nForeign relations\n\n\n\n\n\n\n\n\n4.3\nMilitary\n\n\n\n\n\n\n\n\n4.4\nCivil society\n\n\n\n\n\n\n\n\n4.5\nHuman rights\n\n\n\n\n\n\n\n\n4.6\nCorruption\n\n\n\n\n\n\n\n\n\n\n5\nEconomy\n\n\n\n\nToggle Economy subsection\n\n\n\n\n\n5.1\nEnergy\n\n\n\n\n\n\n\n\n\n\n6\nDemographics\n\n\n\n\nToggle Demographics subsection\n\n\n\n\n\n6.1\nUrban centres\n\n\n\n\n\n\n\n\n6.2\nLanguage\n\n\n\n\n\n\n\n\n6.3\nReligion\n\n\n\n\n\n\n\n\n6.4\nEducation\n\n\n\n\n\n\n\n\n6.5\nHealth\n\n\n\n\n\n\n\n\n\n\n7\nCulture\n\n\n\n\n

#### Loading my research paper

In [3]:
pdf_loader = PyPDFLoader("A-comperative-analysis-on-fake-news-detection-methods.pdf")
pdf = pdf_loader.load()
pdf

[Document(page_content='A Comparative Analysis on Fake News Detection Methods\nMd. Badhon Miah\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\nbadhon1512@gmail.com\nTawhid Khondakar\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\ntawhid.khondakar18@gmail.com\nAtanu Saha\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\natanu.saha415@gmail.com\nArrafi Ahsan Somudro\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\narrafeeahsan@gmail.com\nDr. Dip Nandi\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\ndip.nandi@aiub.edu\nABSTRACT\nIn present era, believing a news is as easier that it is also hard to\nbelieve. It is highly possible that people who came across a news\ndon’t justify it before believing. Majority of people believe a news on\nthe basis of its spreading. But believing a news which is fake impacts\nthe world including politics, economics, education, health, business,\ncommunications and man

### Let's make the vector store

In [10]:
!ollama pull all-minilm


[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest 
pulling 797b70c4edf8...   0% ▕                ▏    0 B/ 45 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 797b70c4edf8...   0% ▕                ▏    0 B/ 45 MB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 797b70c4edf8...   0% ▕                ▏    0 B/ 45 MB                  [?25h[?25l[2K

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(pdf)
documents[:5]

[Document(page_content='A Comparative Analysis on Fake News Detection Methods\nMd. Badhon Miah\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\nbadhon1512@gmail.com\nTawhid Khondakar\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\ntawhid.khondakar18@gmail.com\nAtanu Saha\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\natanu.saha415@gmail.com\nArrafi Ahsan Somudro\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\narrafeeahsan@gmail.com\nDr. Dip Nandi\nAmerican International\nUniversity-Bangladesh\nDhaka, Bangladesh\ndip.nandi@aiub.edu\nABSTRACT\nIn present era, believing a news is as easier that it is also hard to\nbelieve. It is highly possible that people who came across a news\ndon’t justify it before believing. Majority of people believe a news on\nthe basis of its spreading. But believing a news which is fake impacts\nthe world including politics, economics, education, health, business,\ncommunications and man

In [9]:
embedding_function = OllamaEmbeddings(model="all-minilm")
db = FAISS.from_documents(documents, embedding_function)


In [10]:
query = "Who wrote the paper -A Comparative Analysis on Fake News Detection Methods?"
retireved_results=db.similarity_search(query)
print(retireved_results[0].page_content)

the world including politics, economics, education, health, business,
communications and many more domains of life. To bounce back
from these impacts, it is necessary to know in depth of fake news
and its properties. Thus, in this paper we discuss the real definitions
of fake news, how a fake news is made, and its impact on life
and the science behind fake news. As currently people are more
interested to research on fake news, we discuss and analyze existing
detecting methodologies, dividing each into certain criteria. Finally,
we conclude by giving a table which contains comparative analysis
of detecting methods and suggested some future work.
KEYWORDS
Fake news, Fake News Detection Method, Social Media, Satire,
Hoaxes
ACM Reference Format:
Md. Badhon Miah, Tawhid Khondakar, Atanu Saha, Arrafi Ahsan Somudro,
and Dr. Dip Nandi. 2018. A Comparative Analysis on Fake News Detection
Methods. In Proceedings of ACM Conference (Conference’17). ACM, New
