In [8]:
!pip install ipykernel
!python -m ipykernel install --user --name=llmapp


Installed kernelspec llmapp in C:\Users\Albin\AppData\Roaming\jupyter\kernels\llmapp


In [9]:
print("OK")

OK


In [14]:
# %%
from git import Repo
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os



# ✅ Gemini imports
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [15]:
%pwd

'c:\\Users\\Albin\\Desktop\\LLM_PROJECT\\research'

In [16]:
!mkdir test_repo

In [17]:
# %%
repo_path = "test_repo/"

repo = Repo.clone_from(
    "https://github.com/entbappy/End-to-end-Medical-Chatbot-Generative-AI",
    to_path=repo_path
)

In [18]:
# %%
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [19]:
documents = loader.load()

In [20]:
documents

[Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='from flask import Flask, render_template, jsonify, request\nfrom src.helper import download_hugging_face_embeddings\nfrom langchain_pinecone import PineconeVectorStore\nfrom langchain_openai import OpenAI\nfrom langchain.chains import create_retrieval_chain\nfrom langchain.chains.combine_documents import create_stuff_documents_chain\nfrom langchain_core.prompts import ChatPromptTemplate\nfrom dotenv import load_dotenv\nfrom src.prompt import *\nimport os\n\napp = Flask(__name__)\n\nload_dotenv()\n\nPINECONE_API_KEY=os.environ.get(\'PINECONE_API_KEY\')\nOPENAI_API_KEY=os.environ.get(\'OPENAI_API_KEY\')\n\nos.environ["PINECONE_API_KEY"] = PINECONE_API_KEY\nos.environ["OPENAI_API_KEY"] = OPENAI_API_KEY\n\nembeddings = download_hugging_face_embeddings()\n\n\nindex_name = "medicalbot"\n\n# Embed each chunk and upsert the embeddings into your Pinecone index.\ndocsearch = PineconeVectorS

In [21]:
len(documents)

7

In [22]:
# %%
documents_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON,
    chunk_size=500,
    chunk_overlap=20
)

In [24]:
texts = documents_splitter.split_documents(documents)
len(texts)

13

In [31]:
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")


In [32]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory="./db")
vectordb.persist()

  vectordb.persist()


In [37]:
# %%
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")


In [38]:
memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", return_messages=True)

In [39]:
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k": 8}),
    memory=memory
)


In [40]:
question = "what is download_hugging_face_embeddings function?"
result = qa(question)

In [41]:
print(result['answer'])

The `download_hugging_face_embeddings` function downloads sentence embeddings from Hugging Face using the `sentence-transformers/all-MiniLM-L6-v2` model.  The function returns these embeddings.


In [42]:
# %%
question = "what is load_pdf_file function?"
result = qa(question)
print(result['answer'])

The `load_pdf_file` function uses the `DirectoryLoader` from the Langchain library to load PDF files from a specified directory.  It returns a list of `Document` objects, each representing a loaded PDF file.
