In [1]:
from langchain_core.tools import tool


@tool
def multiply(first_int: int, second_int: int) -> int:
    """Multiply two integers together."""
    return first_int * second_int


@tool
def add(first_int: int, second_int: int) -> int:
    "Add two integers."
    return first_int + second_int


@tool
def exponentiate(base: int, exponent: int) -> int:
    "Exponentiate the base to the exponent power."
    return base**exponent


In [2]:
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent, ZeroShotAgent, initialize_agent
from langchain_openai import ChatOpenAI

In [3]:
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), MessagesPlaceholder(

In [4]:
# Choose the LLM that will drive the agent
# Only certain models support this
model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)

In [5]:
# What-If Explanations

In [6]:
from pydantic import BaseModel

class BmiModel(BaseModel):
    height: float = None
    weight: float = None
    bmi: float = None
    obesity_status: str = "Unknown"

In [7]:
"""
@tool
def predict_model(new_value: BmiModel) -> BmiModel:
    '''Calculate BMI'''
    new_value.bmi = new_value.weight / ((new_value.height/100.0) ** 2)    

    if new_value.bmi > 30:
        new_value.obesity_status = "obese"
    else:
        new_value.obesity_status = "healthy"
    return new_value.bmi


"""

'\n@tool\ndef predict_model(new_value: BmiModel) -> BmiModel:\n    \'\'\'Calculate BMI\'\'\'\n    new_value.bmi = new_value.weight / ((new_value.height/100.0) ** 2)    \n\n    if new_value.bmi > 30:\n        new_value.obesity_status = "obese"\n    else:\n        new_value.obesity_status = "healthy"\n    return new_value.bmi\n\n\n'

In [8]:
class MLModel(BaseModel):
    age: float = 29.0    
    gender: int = 1
    height: float = 175
    weight: float = 72
    systolic_bp: float = 120
    diastolic_bp: float = 80
    cholesterol_status: int = 1
    glucose_status: int = 1
    smoking_status: int = 1
    alcohol_status: int = 1
    physical_activity_status: int = 1

In [9]:
def transform_age(df):
    """
    Convert Age to AGE_GROUP
    """
    age_bins = [0, 30, 40, 50, 60, 150 ]
    age_labels = [0, 1, 2, 3, 4]

    df["AGE_GROUP"] = pd.cut(df['age'], 
                             bins=age_bins, 
                             labels=age_labels, 
                             include_lowest=True,
                             right=True)
    return df


def transform_bmi(df):
    """
    Convert Height and Weight to BMI
    """
    df["BMI"] = df["weight"]/ ((df["height"] * 0.01) ** 2)
    return df


def transform_map(df):
    """
    Convert Systolic BP and Diastolic BP to MAP
    """
    df["MEAN_ARTERIAL_PRESSURE"] = df["diastolic_bp"] + ((df["systolic_bp"] - df["diastolic_bp"])/3)
    return df

def select_features(df):
    """
    Drop unnecessary features
    """
    df = df.drop(["age", "height", "weight", "systolic_bp", "diastolic_bp"],axis='columns') 
    return df

from sklearn.preprocessing import LabelEncoder
def label_encoding(df, features):
    '''
    Method for label encoding all selected ordinal fields
    '''
    for f in features:
        if(f in df.columns):
            le = LabelEncoder()
            df[f]= le.fit_transform(df[f])
        else:
            print('Feature not found')
    return df



def transform_features(df):
    """
    Pipeline steps:
    # 1. Convert Age to AGE_GROUP
    # 2. Convert Height, Weight to BMI
    # 3. Convert SBP and DBP to MAP
    # 4. Perform label encoding on ordinal features
    # 5. Select Features to make the prediction
    """
    df = transform_age(df)
    df = transform_bmi(df)
    df = transform_map(df)
    df = label_encoding(df, features=['cholesterol_status',
                                      'glucose_status', 
                                      'AGE_GROUP'])
    df = select_features(df)
    return df

In [10]:
patient_data = {
"patient_id" : 1011,
"age": 29.0,
"gender": 2,
"height": 175,
"weight": 72,
"systolic_bp": 120,
"diastolic_bp": 80,
"cholesterol_status": 1,
"glucose_status": 1,
"smoking_status": 1,
"alcohol_status": 1,
"physical_activity_status":1
}

In [11]:
import json

initial_data = json.dumps(patient_data)
initial_data

'{"patient_id": 1011, "age": 29.0, "gender": 2, "height": 175, "weight": 72, "systolic_bp": 120, "diastolic_bp": 80, "cholesterol_status": 1, "glucose_status": 1, "smoking_status": 1, "alcohol_status": 1, "physical_activity_status": 1}'

In [12]:
# Add dataset description as a context

data_description = ("There are 3 types of input features: Objective: factual information; Examination: results of medical examination; Subjective: information given by the patient.\n"
+ "The dataset has the following features:\nAge | Objective Feature | age | unit is years\n"
+"Height | Objective Feature | height | int (unit is cm) |\n"
+"Weight | Objective Feature | weight | float (unit is kg) |\n"
+"Gender | Objective Feature | gender | categorical code where value 1 = women, 2= men|\n" 
+"Systolic blood pressure | Examination Feature | systolic_bp | int data type, but measure in mmHg|\n"
+"Diastolic blood pressure | Examination Feature | diastolic_bp | int data type, but measure in mmHg|\n"
+"Cholesterol | Examination Feature | cholesterol | 1: normal, 2: above normal, 3: well above normal |\n"
+"Glucose | Examination Feature | glucose_status | 1: normal, 2: above normal, 3: well above normal |\n"
+"Smoking | Subjective Feature | smoking_status | 0 value means does not smoke, value =1 means regular smoker|\n"
+"Alcohol intake | Subjective Feature | alcohol_status | 0 value means does not consume alcohol, value=1 means regular consumption of alcohol|\n"
+"Physical activity | Subjective Feature | phsycial_activity_status | 0 value means low physcial activity level, value=1 means high physical actively level|\n"
+"Presence or absence of cardiovascular disease | Target Variable | cvd_status | cvd_status = 0 means low risk of CVD, cvd_status=1 means high risk of CVD\n"
+"All of the dataset values were collected at the moment of medical examination.")

In [13]:
import joblib
import pandas as pd

@tool
def predict_model(new_value: MLModel) -> int:
    """Calculate CVD risk"""
    
    model = joblib.load('model/light_gbm.joblib')
    dict_vals = new_value.__dict__
    x_df = pd.DataFrame(dict_vals, index=[0])
    x_df = transform_features(x_df)
    print(x_df)
    y_pred = model.predict(x_df)
    
    return y_pred

In [14]:
tools = [multiply, add, exponentiate, predict_model]

In [15]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [16]:
memory.load_memory_variables({})

{'chat_history': []}

In [17]:
memory.clear()

In [18]:
# Construct the OpenAI Tools agent
agent = create_openai_tools_agent(model, tools, prompt)

In [19]:
# Create an agent executor by passing in the agent and tools
agent_executor  = AgentExecutor(
    agent=agent, tools=tools, verbose=True, memory=memory
)

In [20]:
agent_executor.invoke(
    {
        "input": f"Use the information from the dataset description to answer questions whenever needed: {data_description}"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI will now use the dataset information to assist you with any questions or tasks you have regarding the features and target variable. Feel free to ask your questions or provide any specific tasks you'd like assistance with.[0m

[1m> Finished chain.[0m


{'input': 'Use the information from the dataset description to answer questions whenever needed: There are 3 types of input features: Objective: factual information; Examination: results of medical examination; Subjective: information given by the patient.\nThe dataset has the following features:\nAge | Objective Feature | age | unit is years\nHeight | Objective Feature | height | int (unit is cm) |\nWeight | Objective Feature | weight | float (unit is kg) |\nGender | Objective Feature | gender | categorical code where value 1 = women, 2= men|\nSystolic blood pressure | Examination Feature | systolic_bp | int data type, but measure in mmHg|\nDiastolic blood pressure | Examination Feature | diastolic_bp | int data type, but measure in mmHg|\nCholesterol | Examination Feature | cholesterol | 1: normal, 2: above normal, 3: well above normal |\nGlucose | Examination Feature | glucose_status | 1: normal, 2: above normal, 3: well above normal |\nSmoking | Subjective Feature | smoking_status 

In [21]:
from langchain_core.messages import AIMessage, HumanMessage

agent_executor.invoke(
    {
        "input": f"The patient data is: {initial_data}"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `predict_model` with `{'new_value': {'age': 29.0, 'gender': 2, 'height': 175, 'weight': 72, 'systolic_bp': 120, 'diastolic_bp': 80, 'cholesterol_status': 1, 'glucose_status': 1, 'smoking_status': 1, 'alcohol_status': 1, 'physical_activity_status': 1}}`


[0m   gender  cholesterol_status  glucose_status  smoking_status  alcohol_status  \
0       2                   0               0               1               1   

   physical_activity_status  AGE_GROUP        BMI  MEAN_ARTERIAL_PRESSURE  
0                         1          0  23.510204               93.333333  
[36;1m[1;3m[0][0m[32;1m[1;3mBased on the patient data provided, the calculated cardiovascular disease (CVD) risk status is low, with a CVD status of 0. If you have any more questions or need further assistance, feel free to ask![0m

[1m> Finished chain.[0m


{'input': 'The patient data is: {"patient_id": 1011, "age": 29.0, "gender": 2, "height": 175, "weight": 72, "systolic_bp": 120, "diastolic_bp": 80, "cholesterol_status": 1, "glucose_status": 1, "smoking_status": 1, "alcohol_status": 1, "physical_activity_status": 1}',
 'chat_history': [HumanMessage(content='Use the information from the dataset description to answer questions whenever needed: There are 3 types of input features: Objective: factual information; Examination: results of medical examination; Subjective: information given by the patient.\nThe dataset has the following features:\nAge | Objective Feature | age | unit is years\nHeight | Objective Feature | height | int (unit is cm) |\nWeight | Objective Feature | weight | float (unit is kg) |\nGender | Objective Feature | gender | categorical code where value 1 = women, 2= men|\nSystolic blood pressure | Examination Feature | systolic_bp | int data type, but measure in mmHg|\nDiastolic blood pressure | Examination Feature | dia

In [22]:
output = agent_executor.invoke({"input":"Does the person have high risk of cardiovascular disease?"})

output



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `predict_model` with `{'new_value': {'age': 29.0, 'gender': 2, 'height': 175, 'weight': 72, 'systolic_bp': 120, 'diastolic_bp': 80, 'cholesterol_status': 1, 'glucose_status': 1, 'smoking_status': 1, 'alcohol_status': 1, 'physical_activity_status': 1}}`


[0m   gender  cholesterol_status  glucose_status  smoking_status  alcohol_status  \
0       2                   0               0               1               1   

   physical_activity_status  AGE_GROUP        BMI  MEAN_ARTERIAL_PRESSURE  
0                         1          0  23.510204               93.333333  
[36;1m[1;3m[0][0m[32;1m[1;3mNo, the person has a low risk of cardiovascular disease. The predicted CVD status is 0, indicating a low risk. If you have any more questions or need further assistance, feel free to ask![0m

[1m> Finished chain.[0m


{'input': 'Does the person have high risk of cardiovascular disease?',
 'chat_history': [HumanMessage(content='Use the information from the dataset description to answer questions whenever needed: There are 3 types of input features: Objective: factual information; Examination: results of medical examination; Subjective: information given by the patient.\nThe dataset has the following features:\nAge | Objective Feature | age | unit is years\nHeight | Objective Feature | height | int (unit is cm) |\nWeight | Objective Feature | weight | float (unit is kg) |\nGender | Objective Feature | gender | categorical code where value 1 = women, 2= men|\nSystolic blood pressure | Examination Feature | systolic_bp | int data type, but measure in mmHg|\nDiastolic blood pressure | Examination Feature | diastolic_bp | int data type, but measure in mmHg|\nCholesterol | Examination Feature | cholesterol | 1: normal, 2: above normal, 3: well above normal |\nGlucose | Examination Feature | glucose_status 

In [23]:
agent_executor.invoke({"input":"What if the weight of the person increases by 100kg when the person is 58 years old, is the risk of diabetes higher?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `predict_model` with `{'new_value': {'age': 58, 'gender': 2, 'height': 175, 'weight': 172, 'systolic_bp': 120, 'diastolic_bp': 80, 'cholesterol_status': 1, 'glucose_status': 1, 'smoking_status': 1, 'alcohol_status': 1, 'physical_activity_status': 1}}`


[0m   gender  cholesterol_status  glucose_status  smoking_status  alcohol_status  \
0       2                   0               0               1               1   

   physical_activity_status  AGE_GROUP        BMI  MEAN_ARTERIAL_PRESSURE  
0                         1          0  56.163265               93.333333  
[36;1m[1;3m[0][0m[32;1m[1;3mEven with the increased weight and age, the risk of diabetes remains low. The predicted CVD status is 0, indicating a low risk. If you have any more questions or need further assistance, feel free to ask![0m

[1m> Finished chain.[0m


{'input': 'What if the weight of the person increases by 100kg when the person is 58 years old, is the risk of diabetes higher?',
 'chat_history': [HumanMessage(content='Use the information from the dataset description to answer questions whenever needed: There are 3 types of input features: Objective: factual information; Examination: results of medical examination; Subjective: information given by the patient.\nThe dataset has the following features:\nAge | Objective Feature | age | unit is years\nHeight | Objective Feature | height | int (unit is cm) |\nWeight | Objective Feature | weight | float (unit is kg) |\nGender | Objective Feature | gender | categorical code where value 1 = women, 2= men|\nSystolic blood pressure | Examination Feature | systolic_bp | int data type, but measure in mmHg|\nDiastolic blood pressure | Examination Feature | diastolic_bp | int data type, but measure in mmHg|\nCholesterol | Examination Feature | cholesterol | 1: normal, 2: above normal, 3: well abov

In [24]:
agent_executor.invoke(
    {
        "input": "What if the systolic blood pressure of the person increases to 180 mmHg. Now can the person have a high risk?"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `predict_model` with `{'new_value': {'age': 58, 'gender': 2, 'height': 175, 'weight': 172, 'systolic_bp': 180, 'diastolic_bp': 80, 'cholesterol_status': 1, 'glucose_status': 1, 'smoking_status': 1, 'alcohol_status': 1, 'physical_activity_status': 1}}`


[0m   gender  cholesterol_status  glucose_status  smoking_status  alcohol_status  \
0       2                   0               0               1               1   

   physical_activity_status  AGE_GROUP        BMI  MEAN_ARTERIAL_PRESSURE  
0                         1          0  56.163265              113.333333  
[36;1m[1;3m[1][0m[32;1m[1;3mWith the increased systolic blood pressure, the risk of cardiovascular disease is now high. The predicted cvd_status value is 1, indicating a high risk. If you have any more questions or need further assistance, feel free to ask![0m

[1m> Finished chain.[0m


{'input': 'What if the systolic blood pressure of the person increases to 180 mmHg. Now can the person have a high risk?',
 'chat_history': [HumanMessage(content='Use the information from the dataset description to answer questions whenever needed: There are 3 types of input features: Objective: factual information; Examination: results of medical examination; Subjective: information given by the patient.\nThe dataset has the following features:\nAge | Objective Feature | age | unit is years\nHeight | Objective Feature | height | int (unit is cm) |\nWeight | Objective Feature | weight | float (unit is kg) |\nGender | Objective Feature | gender | categorical code where value 1 = women, 2= men|\nSystolic blood pressure | Examination Feature | systolic_bp | int data type, but measure in mmHg|\nDiastolic blood pressure | Examination Feature | diastolic_bp | int data type, but measure in mmHg|\nCholesterol | Examination Feature | cholesterol | 1: normal, 2: above normal, 3: well above norma

In [25]:
output = agent_executor.invoke(
    {
        "input": "How can the risk be reduced?"
    }
)

output['output']



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo reduce the risk of cardiovascular disease, it is important to focus on lifestyle changes and medical interventions. Here are some ways to reduce the risk:

1. Maintain a Healthy Weight: Losing weight, if necessary, and maintaining a healthy weight can reduce the risk of cardiovascular disease.

2. Exercise Regularly: Engaging in regular physical activity and exercise can improve cardiovascular health.

3. Healthy Diet: Consuming a balanced and healthy diet, low in saturated fats, trans fats, and cholesterol, can help reduce the risk of cardiovascular disease.

4. Manage Blood Pressure: Monitoring and managing blood pressure levels through medication and lifestyle changes can reduce the risk of cardiovascular disease.

5. Quit Smoking: If the person smokes, quitting smoking can significantly reduce the risk of cardiovascular disease.

6. Limit Alcohol Consumption: Moderating alcohol intake or avoiding it altogether can lowe

"To reduce the risk of cardiovascular disease, it is important to focus on lifestyle changes and medical interventions. Here are some ways to reduce the risk:\n\n1. Maintain a Healthy Weight: Losing weight, if necessary, and maintaining a healthy weight can reduce the risk of cardiovascular disease.\n\n2. Exercise Regularly: Engaging in regular physical activity and exercise can improve cardiovascular health.\n\n3. Healthy Diet: Consuming a balanced and healthy diet, low in saturated fats, trans fats, and cholesterol, can help reduce the risk of cardiovascular disease.\n\n4. Manage Blood Pressure: Monitoring and managing blood pressure levels through medication and lifestyle changes can reduce the risk of cardiovascular disease.\n\n5. Quit Smoking: If the person smokes, quitting smoking can significantly reduce the risk of cardiovascular disease.\n\n6. Limit Alcohol Consumption: Moderating alcohol intake or avoiding it altogether can lower the risk of cardiovascular disease.\n\n7. Mana

In [58]:
output = agent_executor.invoke(
    {
        "input": "What is 4 to the power 5?"
    }
)

output['output']

'4 to the power of 5 is equal to 1024.'

In [49]:
### New Experiment

In [53]:
from langchain.memory import ChatMessageHistory
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [55]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

chain = prompt | model

In [63]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)
prompt.pretty_print(

)


You are a helpful assistant. Answer all questions to the best of your ability.


[33;1m[1;3m{chat_history}[0m


[33;1m[1;3m{input}[0m


In [64]:
from langchain_core.runnables.history import RunnableWithMessageHistory

new_session = ChatMessageHistory()

chain_with_message_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: new_session,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [57]:
chain_with_message_history.invoke(
    {"input": "What is the BMI of the person, if height is 175 cm, weight is 72 kg. Is the person too skinny?"},
    {"configurable": {"session_id": "unused"}},
)

AIMessage(content='The BMI (Body Mass Index) of the person can be calculated using the formula: BMI = weight (kg) / (height (m) * height (m)). \n\nFor the given measurements:\nBMI = 72 / (1.75 * 1.75) ≈ 23.5\n\nA BMI of 23.5 falls within the "normal" range, which is typically considered to be between 18.5 and 24.9. Therefore, the person is not too skinny based on their BMI.')

In [58]:
chain_with_message_history.invoke(
    {"input": "How does the BMI change if the weight is 85kg?"},
    {"configurable": {"session_id": "unused"}},
)

AIMessage(content='If the weight is 85 kg, we can recalculate the BMI using the same formula:\nBMI = 85 / (1.75 * 1.75) ≈ 27.8\n\nWith a weight of 85 kg, the BMI would be approximately 27.8. This would place the person in the "overweight" category, as a BMI between 25 and 29.9 is generally considered to be overweight.')

In [59]:
chain_with_message_history.invoke(
    {"input": "What if the weight increases by another 20kg?"},
    {"configurable": {"session_id": "unused"}},
)

AIMessage(content='If the weight increases by another 20 kg, the new weight would be 105 kg. We can recalculate the BMI using the same formula:\nBMI = 105 / (1.75 * 1.75) ≈ 34.3\n\nWith a weight of 105 kg, the BMI would be approximately 34.3. This would place the person in the "obese" category, as a BMI of 30 or higher is generally considered to be obese.')