In [15]:
import os
from pathlib import Path
from langchain_community.document_loaders import (
    TextLoader,
    NotebookLoader,
    PyPDFLoader
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

from langchain_community.vectorstores import Chroma

In [16]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings


In [17]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyDfOnww_Cb2nzsccLYYRRBxy3SSWG-Cwd8"


In [18]:
SOURCE_CODE_DIRECTORY = "all_my_code"
# Path to the local vector database
PERSIST_DIRECTORY = "chroma_db"
# Chunking parameters
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 100

In [19]:

def load_documents(directory):
    """Loads all .py and .ipynb files from the specified directory."""
    documents = []
    for item_path in Path(directory).rglob('*'):
        if item_path.is_file():
            
            if item_path.suffix == '.pdf':
                print(f"Loading PDF: {item_path.name}")
                loader = PyPDFLoader(str(item_path))
                documents.extend(loader.load())
            
            if item_path.suffix == '.py':
                loader = TextLoader(str(item_path), encoding="utf-8")
                documents.extend(loader.load())
            elif item_path.suffix == '.ipynb':
                # NotebookLoader handles the JSON structure of .ipynb files
                loader = NotebookLoader(
                    str(item_path),
                    include_outputs=False, # Don't include cell outputs
                    max_output_length=20,
                    remove_newline=True,
                )
                documents.extend(loader.load())
    return documents


def chunk_documents(documents):
    """
    Chunks documents using a splitter that is aware of code syntax.
    This is better than a simple character splitter.
    """
    python_splitter = RecursiveCharacterTextSplitter.from_language(
        language="python", chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
    )
    chunks = python_splitter.split_documents(documents)
    return chunks

In [20]:
documents = load_documents(SOURCE_CODE_DIRECTORY)

Loading PDF: 1-s2.0-S0168365924004504-main (1).pdf


In [22]:
documents

[Document(metadata={'producer': 'Acrobat Distiller 8.1.0 (Windows)', 'creator': 'Elsevier', 'creationdate': '2024-09-06T18:30:07+00:00', 'crossmarkdomains[1]': 'elsevier.com', 'crossmarkmajorversiondate': '2010-04-23', 'creationdate--text': '6th September 2024', 'elsevierwebpdfspecifications': '7.0', 'robots': 'noindex', 'moddate': '2024-09-06T19:36:02+00:00', 'author': 'Gi-Su Na', 'doi': '10.1016/j.jconrel.2024.07.019', 'keywords': 'Microfluidics,Dual vortex,3D-printing,Uniform nanoparticles,mRNA delivery,Consistent production', 'title': 'Full-cycle study on developing a novel structured micromixer and evaluating the nanoparticle products as mRNA delivery carriers', 'subject': 'Journal of Controlled Release, 373 (2024) 161-171. doi:10.1016/j.jconrel.2024.07.019', 'crossmarkdomains[2]': 'sciencedirect.com', 'crossmarkdomainexclusive': 'true', 'source': 'all_my_code\\1-s2.0-S0168365924004504-main (1).pdf', 'total_pages': 11, 'page': 0, 'page_label': '161'}, page_content='Journal of Cont

In [23]:
chunks = chunk_documents(documents)

In [24]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

gemini key = "AIzaSyDfOnww_Cb2nzsccLYYRRBxy3SSWG-Cwd8"

In [25]:
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=PERSIST_DIRECTORY
)

In [26]:
# ## CHANGED ##: Import Google's chat and embedding classes
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [27]:
# --- 1. DEFINE CONSTANTS & INITIALIZE ---
# ## CHANGED ##: Point to the new Gemini-powered database
PERSIST_DIRECTORY = "chroma_db"
# ## CHANGED ##: Use a Gemini Pro model for generation
LLM_MODEL = "gemini-1.5-flash-latest"

# --- 2. SETUP THE RAG CHAIN ---

# ## CHANGED ##: Load the vector store using Google's embedding function
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vectorstore = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)

# ## CHANGED ##: Initialize the Gemini LLM
llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.1, convert_system_message_to_human=True)
# Note: `convert_system_message_to_human=True` is sometimes needed for Gemini models
# when using prompt templates that have a system message.

# Create a retriever to fetch relevant documents
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# This prompt template is generic and works well with Gemini too.
prompt_template = """
You are an expert researcg assistant and lab  mentor.
Your task is to answer questions about the lab's database which is either code or other paper related information.
the questions might be related to codes, or materials, or scientific. you need to answer the questions as concisely as possible.
Use the following retrieved context to answer the question.
If you don't know the answer from the context, just say that you don't know.
Be concise and provide code snippets from the context if they are relevant.

CONTEXT:
{context}

QUESTION:
{input}

ANSWER:
"""
prompt = ChatPromptTemplate.from_template(prompt_template)

# This chain takes a question and the retrieved documents and generates an answer.
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# This is the final chain that combines the retriever and the question-answer chain.
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [28]:
response = rag_chain.invoke({"input": "i need python function to convert the flowrate to the speed value of the pump"})



In [29]:
response

{'input': 'i need python function to convert the flowrate to the speed value of the pump',
 'context': [Document(metadata={'source': 'all_my_code\\main_integrated.py'}, page_content='def speed(flowrate):   #ml/min\n    ulpers = flowrate*(100/6)\n    return round((48000/12500)*ulpers)\n\n#initialize the pump (one pump)\ndef initializer(pump):\n    controller.pumps[pump].initialize(valve_position="E")\n    \n    \n#initialize all the pumps   \ndef init_all(washer = False ):\n    aprocess1 = threading.Thread(target=initializer,args=("dox",))\n    aprocess2 = threading.Thread(target=initializer,args=("lip",))\n    aprocess4 = threading.Thread(target=initializer,args=("inter",))\n    aprocess1.start()\n    aprocess2.start()\n    aprocess4.start()\n\n\ndef init_all1(washer = False ):\n    aprocess1 = threading.Thread(target=initializer,args=("dox",))\n    aprocess2 = threading.Thread(target=initializer,args=("lip",))\n    aprocess1.start()\n    aprocess2.start()'),
  Document(metadata={'sour

In [30]:
import gradio as gr # Import Gradio instead of Streamlit


In [31]:
def get_bot_response(message, history):
    """
    This is the core function that connects the RAG chain to the Gradio UI.
    """
    print("User Query:", message)
    
    # Invoke the RAG chain with the user's message
    response = rag_chain.invoke({"input": message})
    
    # The actual answer is in the 'answer' key
    answer = response["answer"]
    
    print("Bot Answer:", answer)
    return answer

In [32]:
# 2. Create and launch the Gradio Chat Interface
# This single line of code creates the entire chat UI
demo = gr.ChatInterface(
    fn=get_bot_response, # The function to call when the user sends a message
    title="🔬 Local Lab Mentor",
    description="Ask me anything about the documents and code in our lab's knowledge base!",
    theme="soft", # Optional: adds a nice theme
    examples=[
        ["What is the purpose of the data_analysis.py script?"],
        ["Summarize our paper on nanoparticle synthesis."],
        ["What is the concentration of Tris-HCl in the lysis buffer?"]
    ]
)

In [33]:
demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://379ebd09e2d674b076.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




User Query: i need python function to convert the flowrate to the speed value of the pump




Bot Answer: ```python
def speed(flowrate):   #ml/min
    ulpers = flowrate*(100/6)
    return round((48000/12500)*ulpers)
```
User Query: flowrate to speed 




Bot Answer: 12500 (for 12.5 mL syringe;  changeable)
User Query: python code to convert flowrate to speed




Bot Answer: ```python
def speed(flowrate):   #ml/min
    ulpers = flowrate*(100/6)
    return round((48000/12500)*ulpers)
```
User Query: what if my syringe is 10 mL? show me the code




Bot Answer: There is no code provided in the context.  The context only describes a relationship between syringe volume and a pump speed parameter (12500), stating that this value is adjustable.  To calculate a new pump speed for a 10 mL syringe, further information is needed on the relationship between syringe volume and pump speed.
User Query: so in the code the 12500 is written. and this is for the 12.5 mL syringe. if i want to make it for 10 mL, how the code will be?




Bot Answer: The provided text only states that 12500 is used for a 12.5 mL syringe and can be changed.  It doesn't provide the formula or code used to calculate this value.  Therefore, I cannot determine the code for a 10 mL syringe without more information.
User Query: how about instead of 12500, just saying 10000




Bot Answer: Changing the value from 12500 to 10000 would alter the calculated flow rate.  The original value likely represents a specific relationship between pump speed and syringe volume (12.5 mL).  A reduction to 10000 would decrease the flow rate.  Further context is needed to determine the impact on the experiment.
User Query: how about bayesian optimization code? show me the section




Bot Answer: The provided text imports `skopt.optimizer`  but doesn't show any Bayesian optimization code using it.  There's no code demonstrating the use of `Optimizer` or other Bayesian optimization functions.
User Query: so how to make optimizer to make optimization ?




Bot Answer: The provided code uses `Optimizer(bounds,base_estimator=gpr,n_initial_points=3,acq_func="EI",random_state=np.random.randint(1200))` to perform optimization.  The `ask()` method suggests points, `flow()` evaluates them, and `tell()` updates the optimizer.  The loop continues until the difference (`dif`) falls below a threshold (`gap`).


In [None]:
demo.close()