In [None]:
# Cookbook for multi-modal (text + tables + images) RAG 
# (https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
# (https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) 
# + Option_3: Retrieve image summary but pass raw image to LLM for synthesis

# pdf2image needs Poppler for Windows,
# Go to: https://github.com/oschwartz10612/poppler-windows/releases
# Download the latest Release-24.08.0.zip file.
# make "C:\Program Files\poppler\"poppler-24.08.0"
# Add Poppler to PATH: System Variables ---> New ---> "C:\Program Files\poppler\poppler-24.08.0\Library\bin"
# download and install "Tesseract" (https://github.com/UB-Mannheim/tesseract/wiki)
# Add Tesseract to PATH: System Variables ---> New ---> "C:\Program Files\Tesseract-OCR"




In [2]:
path = "D:/4-IntoCode/16_LangChain/AgilProjekt_multiModel/"  # use / instead of \


In [3]:
from typing import Any

from pydantic import BaseModel
from unstructured.partition.pdf import partition_pdf

# Get elements
raw_pdf_elements = partition_pdf(
    filename=path + "AKAP1.pdf",
    # Using pdf format to find embedded image blocks
    extract_images_in_pdf=True,
    # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles
    # Titles are any sub-section of the document
    infer_table_structure=True,
    # Post processing to aggregate text once we have the title
    chunking_strategy="by_title",
   
)

In [4]:
# Create a dictionary to store counts of each type
category_counts = {}

for element in raw_pdf_elements:
    category = str(type(element))
    if category in category_counts:
        category_counts[category] += 1
    else:
        category_counts[category] = 1

# Unique_categories will have unique elements
unique_categories = set(category_counts.keys())
category_counts


{"<class 'unstructured.documents.elements.CompositeElement'>": 197,
 "<class 'unstructured.documents.elements.TableChunk'>": 3}

In [6]:
class Element(BaseModel):
    type: str
    text: Any


# Categorize by type
categorized_elements = []
for element in raw_pdf_elements:
    if "unstructured.documents.elements.Table" in str(type(element)):
        categorized_elements.append(Element(type="table", text=str(element)))
    elif "unstructured.documents.elements.CompositeElement" in str(type(element)):
        categorized_elements.append(Element(type="text", text=str(element)))

# Tables
table_elements = [e for e in categorized_elements if e.type == "table"]
print(len(table_elements))

# Text
text_elements = [e for e in categorized_elements if e.type == "text"]
print(len(text_elements))

3
197


In [7]:
# Multi-vector retriever
# Text and Table summaries


from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


In [16]:
# Prompt
prompt_text = """You are an assistant tasked with summarizing tables and text. \
Give a concise and essential summary of the table or text. 
Each summary should not longer than 10 sentences. Please keep it as short as possible. \
Table or text chunk: {element} """
prompt = ChatPromptTemplate.from_template(prompt_text)


# Summary chain
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ") # use Google Gemini instead of OpenAI

from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemma-3-27b-it", temperature=0)    # use "gemma-3-27b-it" instead of gemini-2.0-flash or 1.5

summarize_chain = {"element": lambda x: x} | prompt | model | StrOutputParser()

In [17]:
# Apply to text
texts = [i.text for i in text_elements]

# to avoid 429 error, we can use batch processing
import time

summaries = []
for text in texts:
    try:
        result = summarize_chain.invoke(text)
        summaries.append(result)
        time.sleep(5)  # deal with 1 request per 5 seconds
        break
    except Exception as e:
        print(f"Error: {e}")
        time.sleep(30)  # if error, wait 30 seconds before retrying
for text in texts:
    result = summarize_chain.invoke(text)
    print(result) # deal it one by one

text_summaries = summarize_chain.batch(texts[:10], {"max_concurrency": 1}) 

This article from the *Journal of Molecular and Cellular Cardiology* focuses on A-kinase anchoring protein 1 (AKAP1) and its connection to cardiovascular diseases. It is authored by Wenwen Marin from the Institute for Translational Medicine, Medical Faculty of Qingdao University, China. The research investigates the role of AKAP1, suggesting it plays a part in the development or progression of various heart conditions. Further details regarding the specific diseases and mechanisms studied are not provided in this excerpt, but the article likely delves into the molecular and cellular aspects of AKAP1's function within the cardiovascular system. This suggests a focus on understanding the protein's impact at a fundamental biological level.
This abstract focuses on A-kinase anchoring proteins (AKAPs). AKAPs are significant signaling molecules, particularly within mitochondria. Research suggests a connection between AKAP function and the development of cardiovascular diseases. The keywords 

In [None]:
# Text summary Result: (after running 10 minutes, and stopped by ResourceExhausted error)
'''This article from the *Journal of Molecular and Cellular Cardiology* focuses on A-kinase anchoring protein 1 (AKAP1) and its connection to cardiovascular diseases. It is authored by Wenwen Marin from the Institute for Translational Medicine, Medical Faculty of Qingdao University, China. The research investigates the role of AKAP1, suggesting it plays a part in the development or progression of various heart conditions. Further details regarding the specific diseases and mechanisms studied are not provided in this excerpt, but the article likely delves into the molecular and cellular aspects of AKAP1's function within the cardiovascular system. This suggests a focus on understanding the protein's impact at a fundamental biological level.
This abstract focuses on A-kinase anchoring proteins (AKAPs). AKAPs are significant signaling molecules, particularly within mitochondria. Research suggests a connection between AKAP function and the development of cardiovascular diseases. The keywords indicate the study likely investigates how AKAPs influence signaling pathways related to heart health. Understanding these mechanisms could offer new insights into preventing or treating cardiovascular issues. The research likely explores the role of AKAPs in mitochondrial signaling within the cardiovascular system.
A-kinase anchoring proteins (AKAPs) regulate PKA signaling networks, and AKAP1 specifically protects the heart. This review focuses on AKAP1, detailing its different forms, structural features, and how it interacts with other proteins. It explores the connection between AKAP1 and cardiac hypertrophy – an enlargement of the heart. The review aims to provide a comprehensive understanding of AKAP1’s role in heart health at a molecular level, from its sequence to its function. Ultimately, it seeks to clarify how AKAP1 contributes to protecting against heart disease.
This text highlights a connection between hypoxia (low oxygen), myocardial infarction (heart attack), and dysfunction of endothelial cells – cells lining blood vessels. The research suggests that a protein called AKAP1 may be a potential target for new cardiovascular therapies. Essentially, hypoxia and heart attacks seem to negatively impact endothelial cell function, and AKAP1 could offer a way to counteract these effects. This implies AKAP1 plays a role in maintaining healthy heart function, particularly under stressful conditions like oxygen deprivation. Further investigation into AKAP1 could lead to treatments aimed at protecting the heart and blood vessels.
A-kinase anchoring proteins (AKAPs) function like conductors of an orchestra in cellular signaling. They organize signaling molecules—effectors, enzymes, receptors, and mRNA—in specific locations within the cell. This precise placement optimizes signaling events and helps maintain cellular homeostasis. Essentially, AKAPs don't directly activate signals, but rather ensure the right components are in the right place at the right time for effective communication. They are critical for coordinated and efficient signal transduction, much like a conductor is for a symphony.
AKAPs are scaffold proteins that interact with protein kinase A (PKA). They share a common feature: binding to PKA regulatory subunits through an amphipathic alpha-helix. However, AKAPs exhibit structural diversity. Crucially, AKAPs also possess domains that anchor them – and the PKA complexes they bind – to specific locations within cells (subcellular organelles). This anchoring function creates “transducesome” complexes. Essentially, AKAPs localize PKA signaling to precise sites within the cell.
AKAPs (A-kinase anchoring proteins) are crucial for regulating cAMP signal transduction pathways. They strategically position key enzymes and factors – including GPCRs, adenylyl cyclase, and various phosphatases – near their downstream targets. This proximity enables precise control of signaling processes. AKAPs interact with cellular structures like the plasma membrane, cytoskeleton, and mitochondria. They are essential for efficient and accurate cAMP-dependent signaling within the cell. Essentially, AKAPs act as scaffolding proteins, organizing the components needed for effective signal transduction.
AKAPs are crucial components of cAMP signaling, enhancing its efficiency and integrating it with other cellular pathways. They form complexes with various proteins including ion channels, kinases (PKD, ERK1/2, GSK3B), and small GTP-binding proteins. This complex formation allows AKAPs to regulate diverse cellular events beyond just cAMP signaling. Consequently, AKAPs play a key role in coordinating signaling crosstalk within the cell. Their multi-functional nature highlights their importance in cellular communication and regulation.
Please provide the table or text chunk you would like me to summarize! You've started a sentence ("AKAPs are present widely in organs and cell types, as well as in the...") but haven't finished it. 

I need the complete content to give you a concise summary. Just paste it here, and I'll do my best to provide a summary within the 10-sentence limit.
This text provides a list of abbreviations commonly used in a biomedical context, specifically relating to cardiovascular research. It defines terms related to proteins (Ago2, AKAP, Drp1), signaling molecules (cAMP, CaN), enzymes (AC, cb5r, COX), and diseases (CAD, CVD). Several abbreviations relate to cardiac function, such as B-AR and E-C coupling.  The list also includes abbreviations for cell types (ECs) and a specific enzyme (eNOS). Understanding these abbreviations is crucial for interpreting research in this field. Finally, it defines a structural element (D/D domain).
This text provides a list of abbreviations commonly used in biological and medical research. It covers terms related to signaling pathways (e.g., Epac, GPCR, cAMP), cellular structures (e.g., ER, IMM), enzymes (e.g., endothelial NO synthase, HDACS), and disease processes (e.g., I/R injury, HIV-1 RT). Several abbreviations relate to gene regulation and cellular response to stress (e.g., HIF-1a, Fis1). The list also includes organizational references like HGNC. Essentially, this is a key for understanding specialized terminology within the field.
This text provides a list of abbreviations commonly used in the study of cardiac injury and related molecular processes. It focuses on proteins and domains involved in signaling pathways, mitochondrial function, and cellular responses to stress. Key areas include protein kinase anchoring proteins (AKAPs, including mitochondrial versions - MitoAKAPs), factors regulating gene expression (MEF2, NFATc), and proteins involved in calcium handling (NCX3) and oxidative stress (MnSOD). The abbreviations also cover conditions like myocardial infarction (MI) and left ventricular hypertrophy (LVH), and proteins like lipoprotein lipase (LPL) and c-myc binding protein (MYCBP). Understanding these abbreviations is crucial for interpreting research in cardiovascular biology and disease.
This text provides a list of abbreviations commonly used in biological and biochemical research. It covers terms related to cellular signaling (nuclear factor of activated T cells, PKA, PP1), muscle cell studies (NVMs, PLN), and molecular biology techniques (NMR, RNAi, RISC). Several enzyme families are represented (PDE4, PHDs, PTPD1). The list also includes abbreviations for reactive species (NO, ROS) and protein domains (RING finger domain, RIIBD). Finally, it defines less common terms like RIAD. This glossary aims to standardize terminology within scientific communication.
This text provides a list of abbreviations commonly used in a research context, likely related to cellular biology and physiology. It covers terms pertaining to **reactive oxygen species**, **calcium regulation** (SERCA2, SR), **gene silencing** (shRNA), and **proteins involved in cellular processes** (Siah2, Star, VHL). The abbreviations also include cell types studied – **smooth muscle cells** (SMCs, VSMCs) – and a **growth factor** (VEGF). Understanding these abbreviations is crucial for interpreting research findings in fields like cardiovascular biology, cell stress, and cancer. Essentially, it's a key for deciphering specialized language within a scientific paper or discussion.
This information provides publication details for a research article by W. Marin. The corresponding author is Wenwen Marin, affiliated with the Institute for Translational Medicine at Qingdao University in China, and can be contacted at Wenwen.Marin@gmx.de. The article was initially received on August 11, 2019, revised by November 8, 2019, and accepted on November 22, 2019. It became available online on November 26, 2019, and is published under the journal identifier YJMEC with a DOI of 10.1016/j-yjmec.2019.11.154. Copyright for the article is held by Elsevier Ltd. (2019).
The cardiovascular system—comprising the heart, blood, and blood vessels—is the body’s transport network. It delivers oxygen, carbon dioxide, nutrients, and hormones throughout the body. Maintaining its homeostatic functions is crucial for overall health. Disruptions to this system can lead to a wide range of diseases. These include conditions like hypertension, atherosclerosis, and coronary artery disease. More severe consequences can be heart failure, stroke, atrial fibrillation, and even sudden cardiac death. Therefore, cardiovascular health is essential for preventing acute, chronic, and potentially fatal illnesses.
Cardiovascular diseases (CVD) are the leading cause of death globally and are increasing, particularly in developing countries, despite being largely preventable. Current therapeutic options specifically targeting the *causes* of CVD, like hypertension and heart failure, are limited. Recent research highlights the importance of A-kinase anchoring proteins (AKAP) which are expressed in the heart and play key roles in the cardiovascular system. Further investigation into AKAP proteins may offer new avenues for CVD treatment.
AKAPs (A-kinase anchoring proteins) are implicated in both normal heart function and cardiovascular disease (CVD). They play a role in regulating various physiological processes within the heart. However, their involvement also extends to pathological conditions contributing to CVD development. This dual role suggests AKAPs could be valuable targets for new CVD therapies. Research indicates manipulating AKAP activity may offer a therapeutic avenue for treating heart conditions. Further investigation into AKAPs is warranted to explore their full potential in CVD treatment.
AKAPs (A-kinase anchoring proteins) play a role in regulating cardiovascular function. These proteins don't possess catalytic activity themselves, but they spatially organize protein kinases, notably Protein Kinase A (PKA), near their substrates. This localization is crucial for efficient and specific signaling within heart cells. AKAPs influence various cardiovascular processes including heart rate, contractility, and vascular tone. Different AKAP isoforms are expressed in distinct cardiac compartments, suggesting specialized roles. Disruptions in AKAP expression or function are linked to cardiovascular diseases like heart failure and arrhythmias. Research indicates AKAPs are potential therapeutic targets for these conditions. Further investigation is ongoing to fully understand the complex interplay between AKAPs and cardiovascular health. 
AKAPs (A-kinase anchoring proteins) are key regulators in heart and blood vessel cells. They control the location and timing of cAMP signaling, impacting crucial heart functions. These functions include calcium cycling, heart muscle contraction, and the length of electrical signals in the heart. AKAPs also play a role in heart disease processes like irregular heartbeats, enlarged heart muscle, and heart failure. Furthermore, they are involved in how the heart adapts to low oxygen levels. Essentially, AKAPs coordinate signaling to maintain normal heart function and respond to stress. 
cAMP acts as a key messenger in heart cells, activating PKA to regulate both contraction and relaxation. This process influences critical functions like heart rate via HCN channels. Additionally, cAMP activates Epac proteins involved in calcium management and cardiac growth (hypertrophy). Rac1 is identified as another downstream effector of cAMP, suggesting a complex regulatory network within the heart. These pathways demonstrate cAMP's central role in cardiac function and adaptation.
Here's a concise summary of the provided text:
...
These two research articles explore distinct aspects of cardiac and vascular function. Article 110, by Khalilimeybodi et al., investigates the role of calcium-dependent signaling in heart enlargement due to beta-adrenergic stimulation. Specifically, it focuses on the calcineurin/NFAT pathway. Article 111, from Lugus et al., examines mitofusins—proteins essential for mitochondrial function—and their impact on angiogenesis (new blood vessel formation) in endothelial cells. This study reveals mitofusins modulate various signaling pathways within these cells. Both studies contribute to understanding cellular mechanisms related to cardiovascular health, though through different focuses: hypertrophy versus angiogenesis.
These citations detail research into cellular processes related to disease, particularly cancer and vascular health. Rinaldi *et al.* (2017) found that mitochondrial AKAP1 supports the mTOR pathway and contributes to tumor growth, suggesting a potential therapeutic target. Eelen *et al.* (2015) reviewed endothelial cell metabolism in both healthy and diseased blood vessels, highlighting its importance in vascular function. Both studies contribute to understanding the metabolic underpinnings of complex diseases and potential avenues for intervention. The research spans investigations into mitochondrial function, signaling pathways, and the role of specific cell types in disease progression.
These citations detail research articles published in molecular and biochemical journals. [114] Meyer *et al.* (2011) investigated how phosphorylation regulates the stability and downregulation of vascular endothelial growth factor receptor 2. Their work, published in *Molecular and Cell Biology*, focuses on a PEST motif’s role in this process. [115] Cao *et al.* (2019) explored the function of calcium homeostasis within mitochondria in cardiac muscle cells, as reported in *Archives of Biochemistry and Biophysics*. This research examines the role of mitochondrial calcium in heart function.
These three research articles explore key aspects of calcium signaling and mitochondrial function in heart cells. Eisner et al. (2017) focuses on the fundamental process of excitation-contraction coupling, linking calcium to heart muscle contraction. De la Fuente & Sheu (2019) highlight the crucial communication between the sarcoplasmic reticulum and mitochondria, with calcium playing a central role. Finally, Yu & Pekkurnaz (2018) investigate how mitochondrial dynamics – fusion and fission – are regulated to maintain cellular energy balance. Collectively, these studies emphasize the interconnectedness of calcium handling, mitochondrial health, and cardiac function. They provide insights into the molecular mechanisms governing a healthy heart and potential targets for treating cardiac disease.'''

In [18]:
# Apply to tables
tables = [i.text for i in table_elements]

# to avoid 429 error, we can use batch processing
import time

summaries = []
for table in tables:
    try:
        result = summarize_chain.invoke(table)
        summaries.append(result)
        time.sleep(5)  # deal with 1 request per 5 seconds
    except Exception as e:
        print(f"Error: {e}")
        time.sleep(30)  # if error, wait 30 seconds before retrying
for table in tables:
    result = summarize_chain.invoke(table)
    print(result) # deal it one by one

table_summaries = summarize_chain.batch(tables, {"max_concurrency": 1})



This data presents the expression levels of various AKAP (A-Kinase Anchoring Protein) family members across different detection methods. Several AKAPs (D-AKAP1, akap2, akaps, akapa, akaps, akape, akap7, AkAPs, akAP9, AKAP10, AKAP11, AKAP12, AKAP13, AKAP14) were analyzed. Antibody detection shows many AKAPs are not detected or have low expression. HPA-RNA-seq data provides relative expression values, ranging from 0.0 to 75.0, indicating varying levels of mRNA presence for these proteins.  Notably, AKAP79 and Gravin (AKAP12) show relatively higher RNA-seq expression (47 and 75.0 respectively). The data suggests discrepancies between protein (antibody) and mRNA (RNA-seq) detection for some AKAPs.
This data appears to represent metrics from a GTEx RNA-seq experiment, likely related to gene expression analysis. The sample is "Peart" with an ID of 255. Key values include a score of 14, percentages of 75, 60, 28, 27, and 35, and a value of 13.4.  Additional data points relate to "FANTOMS CAGE

In [None]:
# Table summary Result:
'''This data presents the expression levels of various AKAP (A-Kinase Anchoring Protein) family members across different detection methods. Several AKAPs (D-AKAP1, akap2, akaps, akapa, akaps, akape, akap7, AkAPs, akAP9, AKAP10, AKAP11, AKAP12, AKAP13, AKAP14) were analyzed. Antibody detection shows many AKAPs are not detected or have low expression. HPA-RNA-seq data provides relative expression values, ranging from 0.0 to 75.0, indicating varying levels of mRNA presence for these proteins.  Notably, AKAP79 and Gravin (AKAP12) show relatively higher RNA-seq expression (47 and 75.0 respectively). The data suggests discrepancies between protein (antibody) and mRNA (RNA-seq) detection for some AKAPs.
This data appears to represent metrics from a GTEx RNA-seq experiment, likely related to gene expression analysis. The sample is "Peart" with an ID of 255. Key values include a score of 14, percentages of 75, 60, 28, 27, and 35, and a value of 13.4.  Additional data points relate to "FANTOMS CAGE" with values like 377, 53, 14.9, 44.6, 21.0, 13.7, and 46.7.  The presence of "0.0" in multiple fields suggests missing or zero values for certain measurements. This likely represents a snapshot of quality control or analysis results for this specific sample.
This text describes functional domains within a protein structure. It identifies several key motifs including a tubulin binding motif (residues 10-25), a leucine zipper motif (310-44), and a mitochondrial targeting sequence. Additionally, it notes binding sites for PP1, PKA RIl, and a Tudor domain characterized by a hydrophobic alpha-helix.  The Tudor domain also contains an amphipathic alpha-helix. Residue numbers are provided to pinpoint the location of each domain within the protein sequence. These domains suggest diverse functions related to cellular transport, protein interaction, and mitochondrial localization.'''

In [19]:
# Add to vectorstore
# Use Multi Vector Retriever with summaries:
# InMemoryStore stores the raw text, tables
# vectorstore stores the embedded summaries

import uuid

from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_chroma import Chroma
from langchain_core.documents import Document

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

# The storage layer for the parent documents
store = InMemoryStore()
id_key = "doc_id"

# The retriever (empty to start)
retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    docstore=store,
    id_key=id_key,
)

# Add texts
doc_ids = [str(uuid.uuid4()) for _ in texts]
summary_texts = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(text_summaries)
]
retriever.vectorstore.add_documents(summary_texts)
retriever.docstore.mset(list(zip(doc_ids, texts)))

# Add tables
table_ids = [str(uuid.uuid4()) for _ in tables]
summary_tables = [
    Document(page_content=s, metadata={id_key: table_ids[i]})
    for i, s in enumerate(table_summaries)
]
retriever.vectorstore.add_documents(summary_tables)
retriever.docstore.mset(list(zip(table_ids, tables)))

In [27]:
# Run RAG pipeline.
from langchain_core.runnables import RunnablePassthrough

# Prompt template
template = """Answer the question based only on the following context, which can include text and tables:
{context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# LLM
model = ChatGoogleGenerativeAI(model="gemma-3-27b-it", temperature=0)

# RAG pipeline
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
retriever.invoke("AKAPs")

# Result:
'''["1. Introduction\n\nAs each brilliant symphony is made by not only an orchestra with extraordinary musicians but also a professional conductor who can fi- nely direct the orchestra's performance, A-kinase anchoring proteins (AKAPs) are acting as such critical “conductor” in signal transduction units, placing the binding effectors into specific subcellular locations and optimizing signaling events on co-localized enzymes, receptors and mRNA molecules, to maintain the cell homeostasis.",
 'AKAPs are a group of scaffold proteins with diverse structures but a common ability to bind the regulatory subunit dimers of protein kinase A (PKA) [1] via an amphipathic a-helix [2,3]. Additionally, AKAPs also contain unique lipid- or protein-binding domains for anchoring the whole “transducesome” complexes to specific subcellular organelles,',
 'such as the plasma membrane, cytoskeleton, mitochondria, Golgi ap- paratus, nuclear envelope, Centrosomes, or vesicular structures, being close to the downstream substrates and critical for precise regulation [3-8]. AKAPs assemble the key enzymes and factors that the cyclic adenosine monophosphate (cAMP) signal transduction pathways work with, such as G-protein-coupled receptor (GPCR), adenylyl cyclase (AC), protein phosphatase, phosphodiesterase (PDE), exchange proteins directly activated by',
 'cAMP (Epac) and PKA. Except for enhancing the efficiency of cAMP signaling, AKAPs also regulate other events by forming this multi-functional complex with ion channels, PKD or other kinases, small GTP-binding proteins, ERK1/2 or GSK3B, etc. Thus, AKAPs play an essential role in integrating the signaling cross talk [4,9].']'''

["1. Introduction\n\nAs each brilliant symphony is made by not only an orchestra with extraordinary musicians but also a professional conductor who can fi- nely direct the orchestra's performance, A-kinase anchoring proteins (AKAPs) are acting as such critical “conductor” in signal transduction units, placing the binding effectors into specific subcellular locations and optimizing signaling events on co-localized enzymes, receptors and mRNA molecules, to maintain the cell homeostasis.",
 'AKAPs are a group of scaffold proteins with diverse structures but a common ability to bind the regulatory subunit dimers of protein kinase A (PKA) [1] via an amphipathic a-helix [2,3]. Additionally, AKAPs also contain unique lipid- or protein-binding domains for anchoring the whole “transducesome” complexes to specific subcellular organelles,',
 'such as the plasma membrane, cytoskeleton, mitochondria, Golgi ap- paratus, nuclear envelope, Centrosomes, or vesicular structures, being close to the downs

In [None]:
chain.invoke("AKAPs")

# Result:
''''Based on the provided text, AKAPs are:\n\n*   **Scaffold proteins** with diverse structures.\n*   Capable of **binding the regulatory subunit dimers of protein kinase A (PKA)** via an amphipathic alpha-helix.\n*   Contain **lipid- or protein-binding domains** to anchor "transducesome" complexes to subcellular organelles (plasma membrane, cytoskeleton, mitochondria, Golgi apparatus, nuclear envelope, Centrosomes, or vesicular structures).\n*   **Assemble key enzymes and factors** involved in cAMP signal transduction pathways (GPCR, AC, protein phosphatase, PDE, Epac, and PKA).\n*   **Regulate other events** by forming complexes with ion channels, PKD or other kinases, small GTP-binding proteins, ERK1/2 or GSK3B.\n*   Play an **essential role in integrating signaling crosstalk**.\n*   Act as a **"conductor" in signal transduction units**, optimizing signaling events and maintaining cell homeostasis.''''

'Based on the provided text, AKAPs are:\n\n*   **Scaffold proteins** with diverse structures.\n*   Capable of **binding the regulatory subunit dimers of protein kinase A (PKA)** via an amphipathic alpha-helix.\n*   Contain **lipid- or protein-binding domains** to anchor "transducesome" complexes to subcellular organelles (plasma membrane, cytoskeleton, mitochondria, Golgi apparatus, nuclear envelope, Centrosomes, or vesicular structures).\n*   **Assemble key enzymes and factors** involved in cAMP signal transduction pathways (GPCR, AC, protein phosphatase, PDE, Epac, and PKA).\n*   **Regulate other events** by forming complexes with ion channels, PKD or other kinases, small GTP-binding proteins, ERK1/2 or GSK3B.\n*   Play an **essential role in integrating signaling crosstalk**.\n*   Act as a **"conductor" in signal transduction units**, optimizing signaling events and maintaining cell homeostasis.'

In [None]:
chain.invoke("Table 1. The protein and mRNA expression levels of AKAPs in the human heart muscle")

# Result:
''''The provided text includes a table listing AKAP proteins and some information about their expression. However, it **does not explicitly state "Table 1. The protein and mRNA expression levels of AKAPs in the human heart muscle"**. \n\nThe table lists:\n\n*   **AKAP proteins:** akap1 (D-AKAP1), akap2, (AKAP-KL), akaps, (AKAP110), akapa, (AKAP82), akaps, (AKAP79), akape, (mAKAP) akap7, (AKAP%8), AkAPs, (AKAP95), akAP9, (Yotiao), AKAP10, (D-AKAP2), AKAP11, (AKAP220), AKAP12, (Gravin) AKAP13, (AKAP-Lbc), AKAP14, (AKAP28)\n*   **Antibody availability:**  "No antibody" or "Not detected" is listed for many. Some have "Low" expression.\n*   **HPA-RNA-seq (tissue):** Numbers representing expression levels are provided for some AKAPs.''''

'The provided text includes a table listing AKAP proteins and some information about their expression. However, it **does not explicitly state "Table 1. The protein and mRNA expression levels of AKAPs in the human heart muscle"**. \n\nThe table lists:\n\n*   **AKAP proteins:** akap1 (D-AKAP1), akap2, (AKAP-KL), akaps, (AKAP110), akapa, (AKAP82), akaps, (AKAP79), akape, (mAKAP) akap7, (AKAP%8), AkAPs, (AKAP95), akAP9, (Yotiao), AKAP10, (D-AKAP2), AKAP11, (AKAP220), AKAP12, (Gravin) AKAP13, (AKAP-Lbc), AKAP14, (AKAP28)\n*   **Antibody availability:**  "No antibody" or "Not detected" is listed for many. Some have "Low" expression.\n*   **HPA-RNA-seq (tissue):** Numbers representing expression levels are provided for some AKAPs.'