In [None]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

groq_api_key = os.getenv("GROQ_API_KEY")

llmModel = ChatGroq(model= 'llama3-70b-8192')



In [None]:
messages = [
    ("system", "You are an helpful assistant."),
    ("human", "tell me about dswithbappy"),
]
response = llmModel.invoke(messages)
print(response.content)

# DATA Loader

In [None]:
pip install langchain_community

In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("data.txt")
load = loader.load()
print(load[0].page_content)


In [None]:
# Pdf data

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('filename')

loaded_data = loader.load_and_split()

In [None]:
# CSV loader

from langchain_community.document_loaders import CSVLoader

loader = CSVLoader('filename')

loaded_data = loader.load()
# loaded_data

In [None]:
from langchain_community.document_loaders import UnstructuredHTMLLoader

loader = UnstructuredHTMLLoader('filename')

loaded_data = loader.load()

In [None]:
from langchain_community.document_loaders import WikipediaLoader

loader = WikipediaLoader(query="Tesla", load_max_docs=1)

loaded_data = loader.load()[0].page_content

# -----RAG Implementation-----

# 1. Splitters

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_groq import ChatGroq

_ = load_dotenv(find_dotenv())

groq_api_key = os.getenv("GROQ_API_KEY")

llm = ChatGroq(model = "llama3-70b-8192")

from langchain_community.document_loaders import TextLoader

loader = TextLoader('data.txt')
loaded = loader.load()
print(loaded[0].page_content)


# Text Splitters 

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_groq import ChatGroq

_ = load_dotenv(find_dotenv())

groq_api_key = os.getenv("GROQ_API_KEY")

llm = ChatGroq(model = "llama3-70b-8192")

from langchain_community.document_loaders import TextLoader

loader = TextLoader('data.txt')
loaded = loader.load()


from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(

    separator = "\n\n",
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len,
    is_separator_regex = False

)

chunks = text_splitter.create_documents(loaded[0].page_content)


In [None]:
print(chunks)
len(chunks)

# Recursive Character text Splitter


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter 

chunkss = RecursiveCharacterTextSplitter(
    chunk_size = 26,
    chunk_overlap = 4 
    
)

text = chunkss.split_text(loaded[0].page_content)
text

# Embeddings
. Transform the small parts of text in numbers (vectors) that are easily stored and searched by vector databases.

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()

In [None]:
chunks_of_text =     [
        "Hi there!",
        "Hello!",
        "What's your name?",
        "Bond, James Bond",
        "Hello Bond!"
    ]

In [None]:
embeddings = embeddings_model.embed_documents(chunks_of_text)


In [None]:

from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
loaded_document = TextLoader('data.txt').load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

chunks_of_text = text_splitter.split_documents(loaded_document)

In [None]:
len(chunks_of_text)


In [None]:
vector_db = Chroma.from_documents(chunks_of_text, OpenAIEmbeddings())


In [None]:
question = "What did the president say about the John Lewis Voting Rights Act?"

response = vector_db.similarity_search(question)

print(response[0].page_content)

# faiss

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loaded_document = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

chunks_of_text = text_splitter.split_documents(loaded_document)

embeddings = OpenAIEmbeddings()

vector_db = FAISS.from_documents(chunks_of_text, embeddings)