In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
# Use OpenRouter API key instead of OpenAI
openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
openrouter_model = os.environ.get("OPENROUTER_MODEL", "openai/gpt-3.5-turbo")

In [2]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("Data\IBM-Watson.txt")

loaded_data = loader.load()

In [3]:
loaded_data

[Document(metadata={'source': 'Data\\IBM-Watson.txt'}, page_content="Detailed Overview of IBM WatsonIBM Watson is more than just a question-answering system; it's a suite of AI-powered tools and services designed to help businesses understand, analyze, and act on data. Here's a closer look:Core TechnologyAt its core, Watson leverages a combination of AI techniques, including:Natural Language Processing (NLP): This enables Watson to understand and process human language in its various forms (text, speech, etc.). Watson uses NLP to extract meaning, intent, and context from input.Machine Learning (ML): Watson employs various ML algorithms to learn from data, identify patterns, and improve its performance over time. This allows Watson to automate tasks, make predictions, and provide insights.Deep Learning: A subset of ML, deep learning uses artificial neural networks to analyze complex data, such as images, audio, and text. Watson uses deep learning for tasks like image recognition, speech

In [4]:
loaded_data[0].page_content

"Detailed Overview of IBM WatsonIBM Watson is more than just a question-answering system; it's a suite of AI-powered tools and services designed to help businesses understand, analyze, and act on data. Here's a closer look:Core TechnologyAt its core, Watson leverages a combination of AI techniques, including:Natural Language Processing (NLP): This enables Watson to understand and process human language in its various forms (text, speech, etc.). Watson uses NLP to extract meaning, intent, and context from input.Machine Learning (ML): Watson employs various ML algorithms to learn from data, identify patterns, and improve its performance over time. This allows Watson to automate tasks, make predictions, and provide insights.Deep Learning: A subset of ML, deep learning uses artificial neural networks to analyze complex data, such as images, audio, and text. Watson uses deep learning for tasks like image recognition, speech recognition, and natural language understanding.Knowledge Represent

# CharacterTextSplitter

In [5]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

In [6]:
texts = text_splitter.create_documents([loaded_data[0].page_content])

In [7]:
texts

[Document(metadata={}, page_content="Detailed Overview of IBM WatsonIBM Watson is more than just a question-answering system; it's a suite of AI-powered tools and services designed to help businesses understand, analyze, and act on data. Here's a closer look:Core TechnologyAt its core, Watson leverages a combination of AI techniques, including:Natural Language Processing (NLP): This enables Watson to understand and process human language in its various forms (text, speech, etc.). Watson uses NLP to extract meaning, intent, and context from input.Machine Learning (ML): Watson employs various ML algorithms to learn from data, identify patterns, and improve its performance over time. This allows Watson to automate tasks, make predictions, and provide insights.Deep Learning: A subset of ML, deep learning uses artificial neural networks to analyze complex data, such as images, audio, and text. Watson uses deep learning for tasks like image recognition, speech recognition, and natural langua

In [8]:
len(texts)

1

In [9]:
texts[0]


Document(metadata={}, page_content="Detailed Overview of IBM WatsonIBM Watson is more than just a question-answering system; it's a suite of AI-powered tools and services designed to help businesses understand, analyze, and act on data. Here's a closer look:Core TechnologyAt its core, Watson leverages a combination of AI techniques, including:Natural Language Processing (NLP): This enables Watson to understand and process human language in its various forms (text, speech, etc.). Watson uses NLP to extract meaning, intent, and context from input.Machine Learning (ML): Watson employs various ML algorithms to learn from data, identify patterns, and improve its performance over time. This allows Watson to automate tasks, make predictions, and provide insights.Deep Learning: A subset of ML, deep learning uses artificial neural networks to analyze complex data, such as images, audio, and text. Watson uses deep learning for tasks like image recognition, speech recognition, and natural languag

# RecursiveCharacterTextSplitter

In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=26,
    chunk_overlap=4
)

text = recursive_splitter.split_text(loaded_data[0].page_content)

In [11]:
text

['Detailed Overview of IBM',
 'IBM WatsonIBM Watson is',
 'is more than just a',
 'a question-answering',
 "system; it's a suite of",
 'of AI-powered tools and',
 'and services designed to',
 'to help businesses',
 'understand, analyze, and',
 "and act on data. Here's a",
 'a closer look:Core',
 'TechnologyAt its core,',
 'Watson leverages a',
 'a combination of AI',
 'AI techniques,',
 'including:Natural',
 'Language Processing',
 '(NLP): This enables',
 'Watson to understand and',
 'and process human',
 'language in its various',
 'forms (text, speech,',
 'etc.). Watson uses NLP to',
 'to extract meaning,',
 'intent, and context from',
 'input.Machine Learning',
 '(ML): Watson employs',
 'various ML algorithms to',
 'to learn from data,',
 'identify patterns, and',
 'and improve its',
 'its performance over',
 'time. This allows Watson',
 'to automate tasks, make',
 'predictions, and provide',
 'insights.Deep Learning: A',
 'A subset of ML, deep',
 'learning uses artificial',
 'neura

In [12]:
len(text)

202

# Embeddings
* Transform the small parts of text in numbers (vectors) that are easily stored and searched by vector databases.

In [13]:
%pip install sentence-transformers

Note: you may need to restart the kernel to use updated packages.


In [14]:
# Now the embeddings code should work
from langchain_community.embeddings import HuggingFaceEmbeddings

In [15]:
# Use HuggingFace embeddings
embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [16]:
chunks_of_text =     [
        "Hi there!",
        "Hello!",
        "What's your name?",
        "Bond, James Bond",
        "Hello Bond!"
    ]

In [17]:
embeddings = embeddings_model.embed_documents(chunks_of_text)

In [18]:
embeddings

[[-0.09151118993759155,
  0.025147823616862297,
  0.002047148533165455,
  0.005181029438972473,
  -0.015277055092155933,
  -0.052715789526700974,
  0.016557469964027405,
  0.04239160567522049,
  -0.029215682297945023,
  0.006825136952102184,
  -0.027684316039085388,
  0.007252806797623634,
  -0.010559403337538242,
  -0.04191433638334274,
  0.03230910003185272,
  0.03700307011604309,
  -0.05315244942903519,
  -0.009931979700922966,
  -0.10471195727586746,
  -0.03906850144267082,
  0.03552791476249695,
  0.07590001821517944,
  -0.10407807677984238,
  0.04460621997714043,
  -0.026902098208665848,
  -0.03198998421430588,
  -0.006866851355880499,
  0.064168781042099,
  -0.07118319720029831,
  -0.01758386380970478,
  0.012554174289107323,
  0.12845943868160248,
  -0.02276206575334072,
  0.05372180417180061,
  0.02916974574327469,
  0.0488780252635479,
  -0.0469440221786499,
  -0.07912519574165344,
  0.04516483098268509,
  0.02378946542739868,
  -0.011888730339705944,
  0.01180628128349781,
 

In [19]:
len(embeddings[0])

384

In [20]:
print(embeddings[0][:5])

[-0.09151118993759155, 0.025147823616862297, 0.002047148533165455, 0.005181029438972473, -0.015277055092155933]


# Vector Stores (aka. Vector Databases)
* Store embeddings in a very fast searchable database.

In [21]:
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
loaded_document = TextLoader('Data\Deep learning.txt').load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

chunks_of_text = text_splitter.split_documents(loaded_document)



Created a chunk of size 1434, which is longer than the specified 1000
Created a chunk of size 1827, which is longer than the specified 1000
Created a chunk of size 2147, which is longer than the specified 1000


In [22]:
len(chunks_of_text)

9

In [23]:
# Use HuggingFace embeddings instead of OpenAI
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = Chroma.from_documents(chunks_of_text, embeddings)

In [24]:
vector_db

<langchain_chroma.vectorstores.Chroma at 0x232c63c1390>

In [25]:
question = "What is Deep learning?"

response = vector_db.similarity_search(question)

print(response[0].page_content)

Deep learning is a subfield of machine learning inspired by the structure and function of the human brain's neural networks. It utilizes artificial neural networks with multiple layers (hence, "deep") to analyze and extract complex patterns from large datasets. This enables computers to learn intricate representations of data and perform tasks with a high level of accuracy, often surpassing traditional machine learning approaches, especially in areas like image and speech recognition, and natural language processing.

Here's a breakdown of key aspects:

How Deep Learning Works:


# Vector Store as Retriever
* Find the embedding that best answers your question.

In [26]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader('Data\Deep learning.txt')

In [27]:
%pip install faiss-gpu

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement faiss-gpu (from versions: none)
ERROR: No matching distribution found for faiss-gpu


In [28]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loaded_document = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

chunks_of_text = text_splitter.split_documents(loaded_document)

# Use HuggingFace embeddings instead of OpenAI
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vector_db = FAISS.from_documents(chunks_of_text, embeddings)

Created a chunk of size 1434, which is longer than the specified 1000
Created a chunk of size 1827, which is longer than the specified 1000
Created a chunk of size 2147, which is longer than the specified 1000


In [29]:
vector_db

<langchain_community.vectorstores.faiss.FAISS at 0x2328268dab0>

In [30]:
retriever = vector_db.as_retriever(search_kwargs={"k": 3})

In [31]:
response = retriever.invoke("Why deep learning is used?")
response

[Document(id='9f004a66-2934-4d56-bf84-cece8e9ad24e', metadata={'source': 'Data\\Deep learning.txt'}, page_content='Deep learning is a subfield of machine learning inspired by the structure and function of the human brain\'s neural networks. It utilizes artificial neural networks with multiple layers (hence, "deep") to analyze and extract complex patterns from large datasets. This enables computers to learn intricate representations of data and perform tasks with a high level of accuracy, often surpassing traditional machine learning approaches, especially in areas like image and speech recognition, and natural language processing.\n\nHere\'s a breakdown of key aspects:\n\nHow Deep Learning Works:'),
 Document(id='1852049c-5ebd-4d2a-9875-b9216ba1fd90', metadata={'source': 'Data\\Deep learning.txt'}, page_content='Advantages of Deep Learning:\n\n Automatic Feature Extraction: Reduces the need for manual feature engineering, simplifying the development process.\n Handles Complex Data: Can

In [32]:
len(response)

3