In [64]:
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
# from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
# from langchain_core.output_parsers import StrOutputParser 
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv
import os

# Load document and chunk it into smaller "sub documents"

In [65]:
file_path = "article/LLMs in medicine_accepted.pdf"
loader = PyPDFLoader(file_path)
doc = loader.load()

Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 156 0 (offset 0)
Ignoring wrong pointing object 158 0 (offset 0)
Ignoring wrong pointing object 164 0 (offset 0)
Ignoring wrong pointing object 170 0 (offset 0)
Ignoring wrong pointing object 172 0 (offset 0)
Ignoring wrong pointing object 184 0 (offset 0)


In [66]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=0
)
split_docs = text_splitter.split_documents(doc)
print(f"Generated {len(split_docs)} documents.")

Generated 46 documents.


In [None]:
split_docs[3:7]

# Iniate LLM with Groq

In [None]:
load_dotenv()
os.environ["GROQ_API_KEY"] = os.environ.get('GROQ_API_KEY')
llm = init_chat_model("llama3-8b-8192", model_provider="groq")

In [67]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    temperature=0.1,
    model_name="gemma2:2b",
    api_key="ollama",
    base_url="http://192.168.1.115:11434/v1",
)

# Map

In [68]:
map_template = "Write a concise summary of the following: {docs}."
map_prompt = ChatPromptTemplate([("human", map_template)])
# map_chain = map_prompt | llm | StrOutputParser() 
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Reduce

In [69]:
reduce_template = """
The following is a set of summaries:
{docs}
Take these and distill it into a final, consolidated summary
of the main themes.
"""
reduce_prompt = ChatPromptTemplate([("human", reduce_template)])
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)
# reduce_chain = reduce_prompt | llm | StrOutputParser() 

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain

In [70]:
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="docs"
)

# Combines and iteratively reduces the mapped documents

In [71]:
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=1000,
)

# Combining documents by mapping a chain over them, then combining results

In [72]:
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

In [73]:
%%time
result = []
result = map_reduce_chain.invoke(split_docs[3:7]) # squares = [x**2 for x in range(10)] | for doc in docs
print(result["output_text"])



## Large Language Models: Revolutionizing Healthcare & Beyond

This text explores the transformative potential of **Large Language Models (LLMs)** in various fields, particularly healthcare.  Here's a breakdown of key takeaways:

**1. LLMs are powerful tools:** 
* They excel at understanding and generating human language, enabling tasks like text completion, generation, and even conversational interaction (e.g., ChatGPT).
* Recent advancements allow for impressive performance with minimal fine-tuning, showcasing their adaptability to new tasks.

**2. LLMs in healthcare:**
* **Applications:** Clinical decision support, education, research are just a few examples of how LLMs are being used in medicine. 
* **Challenges:**  Current technology requires close supervision due to limitations in model validation and lack of standardized benchmarks.

**3. The evolution of LLMs:**
* **GPT models:** A series of increasingly powerful models (GPT-2, GPT-3, GPT-4) demonstrate the continuous advanceme