# OpenAI Pipeline 

Complete pipeline using openai and langchain.

## Imports

In [9]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain_openai import ChatOpenAI, OpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain.chains import LLMChain

## Prompts

In [40]:
prompts = [
    "Summarize the medical history of patient PAP587444 for medical purposes.",
    "Get all patients that came in third of Jnauary, 2027 to process payments.",
    "How many available records do I have that can be used for research?"
]

## Purpose Classification

In [25]:
def classify_prompt(prompt):
    pc_llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
    pc_prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """
                You are a helpful assistant that classifies a text input into a predefined access purpose category.

                The following categories exist:

                1. Clinical Care
                Description: Facilitates ongoing patient care by providing healthcare providers with comprehensive access to patient histories, diagnoses, treatments, and contact information. This purpose supports informed clinical decisions and personalized care plans.
                Category code: 'Care'

                2. Research:
                Description: Supports medical research by allowing access to anonymized patient data, focusing on diagnosis and treatment outcomes. Researchers can study patterns, effectiveness, and potential improvements in healthcare delivery and treatments.
                Category code: 'Research'

                3. Billing and Insurance Claims Processing:
                Description: Enables the processing of billing and insurance claims through access to patient insurance information, treatment categories, and patient identifiers. This purpose assists in verifying coverage and submitting claims to insurance providers.
                Category code: 'Insurance'

                4. Patient Support Services:
                Description: Facilitates the provision of support services to patients, including appointment scheduling, follow-up arrangements, and emergency contact communication. Ensures patients receive timely care and information.
                Category code: 'Support'

                5. Public Health Monitoring and Response:
                Description: Allows for the monitoring of public health trends and the management of health crises by analyzing aggregated data on diagnoses, treatments, and patient demographics. Supports public health initiatives and emergency response planning.
                Category code: 'Public'

                6. Clinical Trial Recruitment:
                Description: Supports the identification and recruitment of potential clinical trial participants by matching patient diagnoses and treatments with trial eligibility criteria. Facilitates advancements in medical research and treatment development.
                Category code: 'Trial'

                7. Product Development:
                Description: Informs the development of medical products and services by analyzing treatment outcomes, patient demographics, and health conditions. Drives innovation in healthcare solutions tailored to patient needs. Data will be shared with third parties.
                Category code: 'Product'

                8. Marketing:
                Description: Tailors health-related marketing initiatives to patient demographics and conditions, promoting relevant healthcare services, products, and wellness programs. Aims to enhance patient engagement and healthcare outcomes.
                Category code: 'Marketing'
                
                Classify the input into one of the described categories. 
                Use the defined category code.
                If none of the categories matches, return 'None'. 
                Only return the category code or 'None'. 
                Don't justify your decision.

                """
            ),
            (
                "human", 
                "{input}"),
        ]
    )
    chain = pc_prompt | pc_llm
    response = chain.invoke(prompt)
    print(f"'{prompt}' classified as '{response.content}'.")

    return response.content

## RAG

In [48]:
def rag(prompt, access_purpose):
    agent = create_csv_agent(
        OpenAI(temperature=0.1),
        "faker/dataset_3.csv",
        verbose=False,
        #prefix='Include a prefix here',
        agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    )
    pbac_template = f" Only include rows where 'Intended Purposes' include {access_purpose}."

    rag_prompt = prompt + pbac_template

    response = agent.invoke(rag_prompt).get('output')
    print(response)

    return response

In [50]:
prompt = prompts[0]
access_purpose = classify_prompt(prompt)
response = rag(prompt, access_purpose)

'Summarize the medical history of patient PAP587444 for medical purposes.' classified as 'Care'.
The medical history for patient PAP587444 includes a history of chronic back pain, breast cancer, TMJ, respiratory issues, morbid obesity, and lung cancer. The patient is a 37-year-old female with the phone number 17943741823.


In [49]:
prompt = prompts[2]
access_purpose = classify_prompt(prompt)
response = rag(prompt, access_purpose)

'How many available records do I have that can be used for research?' classified as 'Research'.
There are 9558 available records that can be used for research.


## LLM with History

In [51]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

from langchain_openai import ChatOpenAI

chat = ChatOpenAI(model="gpt-3.5-turbo")

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

chain = prompt | chat

In [52]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.memory import ChatMessageHistory

demo_ephemeral_chat_history_for_chain = ChatMessageHistory()

chain_with_message_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: demo_ephemeral_chat_history_for_chain,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [53]:
chain_with_message_history.invoke(
    {"input": "Translate this sentence from English to French: I love programming."},
    {"configurable": {"session_id": "unused"}},
)

AIMessage(content="The translation is: J'adore la programmation.", response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 39, 'total_tokens': 51}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-fcbd2e49-6987-4419-8fb9-a0b3320c1694-0')

In [54]:
chain_with_message_history.invoke(
    {"input": "What did I just ask you?"}, {"configurable": {"session_id": "unused"}}
)

AIMessage(content='You just asked me to translate the sentence "I love programming" from English to French.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 66, 'total_tokens': 84}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-488275d5-d118-428c-9d4a-01ae45f44ed9-0')

In [55]:
chain_with_message_history.invoke(
    {"input": "Which language?"}, {"configurable": {"session_id": "unused"}}
)

AIMessage(content='You asked me to translate from English to French.', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 95, 'total_tokens': 105}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_d9767fc5b9', 'finish_reason': 'stop', 'logprobs': None}, id='run-a635eaad-94e6-43e4-83d6-ffaadead35cc-0')

In [56]:
chain_with_message_history.invoke(
    {"input": "Do you still remember?"}, {"configurable": {"session_id": "unused"}}
)

AIMessage(content='Yes, you asked me to translate the sentence "I love programming" from English to French.', response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 118, 'total_tokens': 137}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_d9767fc5b9', 'finish_reason': 'stop', 'logprobs': None}, id='run-5f982f46-16b0-4b37-a36e-b5bc26e19c09-0')

## SQL Retriever

https://api.python.langchain.com/en/latest/chains/langchain.chains.sql_database.query.create_sql_query_chain.html

Agent:
https://python.langchain.com/docs/use_cases/sql/quickstart/#agents