In [1]:
import os
%cd ..

/Users/aldensio/Desktop/Projects/ODPRT-chatbot


## Example Email

In [2]:
import extract_msg

In [3]:
f = r'docs/emails/Agreement Type/Agreement Type 01-04B.msg'
msg = extract_msg.Message(f)
msg_sender = msg.sender
msg_date = msg.date
msg_subj = msg.subject
msg_message = msg.body

In [None]:
print(msg_message)

## Email Processor

In [5]:
import logging

logger = logging.getLogger(__name__)

if not logger.hasHandlers():
    logger.setLevel(logging.INFO)
    handler = logging.StreamHandler()
    formatter = logging.Formatter('%(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

In [6]:
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.prompts.chat import ChatPromptTemplate

from pydantic import BaseModel, Field
from typing import Literal, List

In [7]:
class UsefulnessClassification(BaseModel):
    reasoning: str = Field(description="The reasoning behind the classification")
    classification: Literal["useful", "not_useful"] = Field(description="a string to represent if the thread is useful or not useful")

class QAPairs(BaseModel):
    questions: List[str] = Field(description="A list of question(s) the client(s) is/are asking")
    answers: List[str] = Field(description="The answers to the question(s) asked")

In [8]:
CLASSIFICATION_PROMPT = """You are a classification assistant determining whether an email thread is useful or not.

Email Thread:
{email_thread}

Context:
The NUS Office of the Deputy President (Research & Technology) (ODPRT) oversees research compliance, integrity, grant administration, strategic initiatives, industry engagement, and research communications at NUS. The Industry Engagements & Partnerships (IEP) team within ODPRT manages industry partnerships and collaborations.

Instructions:
1. Read the email thread.
2. Write a brief reasoning for your classification.
3. Classify the email thread as "useful" or "not_useful."

Guidelines for Classification:
1. Useful: The thread contains specific, actionable, or relevant information regarding research funding, industry partnerships, or strategic initiatives.
2. Not Useful: The thread is generic, lacks substantive content, or does not pertain to ODPRT's functions.
3. Not Useful: If no clear determination can be made, classify as "Not Useful."

Output Format:
- `reasoning`: (Brief explanation)
- `classification`: 'useful' or 'not_useful'"""

In [9]:
QA_PROMPT = """You are an assistant that extracts all relevant question-answer pairs from an email thread.

Email Thread:
{email_thread}

Instructions:
1. Identify all distinct questions asked by the sender(s) within the email thread.
2. Extract the corresponding answers from the same thread, ensuring relevance and accuracy.
3. Maintain the chronological order of the questions and answers.
4. If no clear question is present, return an empty list for `questions`.
5. If a question has no available answer in the thread, return "No answer available" in the corresponding position in `answers`.

Output Format:
`questions`: ["First extracted question", "Second extracted question"],
`answers`: ["First corresponding answer", "Second corresponding answer"]"""

In [10]:
llm = ChatOpenAI(
    api_key=os.getenv('OPENAI_API_KEY'),
    model=os.getenv('OPENAI_MODEL'),
    temperature=0
)

In [11]:
classification_chain = ChatPromptTemplate([
    ("system", CLASSIFICATION_PROMPT),
    ("human", "{email_thread}"),
]) | llm.with_structured_output(UsefulnessClassification)


qa_chain = ChatPromptTemplate([
    ("system", QA_PROMPT),
    ("human", "{email_thread}"),
]) | llm.with_structured_output(QAPairs)


In [12]:
classification_response = classification_chain.invoke(msg_message)

In [13]:
classification_response.reasoning

'The email thread discusses the process of extending a research project, including the requirement for a VA (Value Added) and the transition to an online portal for managing research agreements. This information is relevant to the functions of the NUS Office of the Deputy President (Research & Technology) and pertains to research funding and compliance, making it useful for stakeholders involved in research administration.'

In [14]:
classification_response.classification

'useful'

In [15]:
qa_response = qa_chain.invoke(msg_message)

In [16]:
qa_response.questions

['Could I get IEP’s advice if we are able to proceed to extend the research project based on the email approval for grantor?',
 'Should we wait for the VA to be executed?',
 'Could you assist to assign a case officer for the VA please?',
 'Could you advise if VA is required for this RCA?']

In [17]:
qa_response.answers

['No answer available',
 'No answer available',
 'Yes, a VA would be required for this RCA for the project extension.',
 'Yes, a VA would be required for this RCA for the project extension.']