In [1]:
import os
import time
import streamlit as st

from langchain import hub
from langchain.chains import RetrievalQA
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain.llms import Ollama
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, WebBaseLoader
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.output_parsers import PydanticOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
FILEPATH    = "ml.pdf"  #change ur pdf name 
LOCAL_MODEL = "llama3"
EMBEDDING   = "nomic-embed-text"
# nomic-embed-text has higher context length and better than OpenAI eh text-embedding-ada-002 nd text-embedding-3-small
# ^^ https://www.nomic.ai/blog/posts/nomic-embed-text-v1

In [5]:
# load the pdf
loader = PyPDFLoader(FILEPATH)
data = loader.load()

# tokenizeeeeeeeeee
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500, chunk_overlap=100)
all_splits = text_splitter.split_documents(data)

In [7]:
# Convert into embeddings and store them 
persist_directory = 'data' #ensures that the embeddings are available for future use, even after the scripts stop 

vectorstore = Chroma.from_documents(
    documents=all_splits, 
    collection_name="rag_chroma",
    embedding=OllamaEmbeddings(model=EMBEDDING),
    persist_directory=persist_directory
)


  embedding=OllamaEmbeddings(model=EMBEDDING),


In [6]:
# # Testing 
# model_local = ChatOllama(model="llama3")

# before_rag = "What is {topic}"
# b4_rag_prompt = ChatPromptTemplate.from_template(before_rag)
# b4_rag_chain = b4_rag_prompt | model_local | StrOutputParser()
# print(b4_rag_chain.invoke({"topic":"Ollama"}))

In [9]:
llm = Ollama(base_url="http://localhost:11434",
                                  model=LOCAL_MODEL,
                                  verbose=True,
                                  callback_manager=CallbackManager(
                                      [StreamingStdOutCallbackHandler()])
                                  )
retriever = vectorstore.as_retriever()

  llm = Ollama(base_url="http://localhost:11434",
  llm = Ollama(base_url="http://localhost:11434",


In [11]:
template = """ 
    Context: {context}
    History: {history}

    User: {question}
    Chatbot:
    """
prompt = PromptTemplate(
        input_variables=["history", "context", "question"],
        template=template,
    )

memory = ConversationBufferMemory(
        memory_key="history",
        return_messages=True,
        input_key="question"
    )


  memory = ConversationBufferMemory(


In [13]:
qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type='stuff',
            retriever=retriever,
            verbose=True,
            chain_type_kwargs={
                "verbose": True,
                "prompt": prompt,
                "memory": memory,
            }
        )

In [15]:
query = "What is classification" # change your question
# query += ". Only from this pdf. Keep it short"
# response = qa_chain(query)

In [17]:
qa_chain.invoke({"query": query})



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m 
    Context: GB RAM.  
The Implementation results are shown in Figure 6. Initially, the input images are preprocessed, then f eatures are 
extracted using pretrained CNN. Finally, classification is performed using SVM classifier.

Nawal Soliman ALKolifi ALEnezi  / Procedia Computer Science 163 (2019) 85–92 87
 
 
Author name / Procedia Computer Science 00 (2019) 000–000 3 
 
 
 
4. Methodology 
 
In this section, the methodology of the proposed system for detection , extraction and classification of skin 
diseases images is described. The system will help significantly in the detection of melanoma, Eczema and Psoriasis. 
The whole architecture can be divided into several modules comprising of preprocessing, feature ext raction, and 
classification. The block diagram of the system is shown in Fig 2.  
 
 
 

{'query': 'What is classification',
 'result': "You're asking what classification is!\n\nIn the context of machine learning and computer science, classification refers to the process of assigning a class label or category to an input instance based on its features or characteristics.\n\nIn simpler terms, imagine you have a bunch of different types of fruits (e.g., apples, bananas, oranges), and you want to create a system that can automatically identify which fruit is which based on its color, shape, size, and other characteristics. That's basically what classification is!\n\nIn this specific context, the authors are using machine learning techniques, such as Support Vector Machines (SVMs), to classify skin disease images into one of three categories: melanoma, eczema, or psoriasis.\n\nWould you like me to explain more about SVMs or image processing in general?"}