In [1]:
import os

os.environ["OPENAI_API_KEY"] = ""
os.environ["LANGCHAIN_API_KEY"] = ""
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "01-03-4"

In [2]:
!pip install -qU langchain_openai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━[0m [32m0.9/1.2 MB[0m [31m25.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [5]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from pydantic import BaseModel, Field

llm = ChatOpenAI(temperature = 0, model = "gpt-4o")

In [6]:
email_conversation = """
From: John (John@bikecorporation.me)
To: Kim (Kim@teddyinternational.me)
Subject: “ZENESIS” bike distribution cooperation and meeting schedule proposal
Dear Mr. Kim,

I am John, Senior Executive Director at Bike Corporation. I recently learned about your new bicycle model, "ZENESIS," through your press release. Bike Corporation is a company that leads innovation and quality in the field of bicycle manufacturing and distribution, with long-time experience and expertise in this field.

We would like to request a detailed brochure for the ZENESIS model. In particular, we need information on technical specifications, battery performance, and design aspects. This information will help us further refine our proposed distribution strategy and marketing plan.

Additionally, to discuss the possibilities for collaboration in more detail, I propose a meeting next Tuesday, January 15th, at 10:00 AM. Would it be possible to meet at your office to have this discussion?

Thank you.

Best regards,
John
Senior Executive Director
Bike Corporation
"""

### Output Parser를 사용하지 않는 경우

In [76]:
from itertools import chain
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import AIMessageChunk
from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import PydanticOutputParser

prompt = PromptTemplate.from_template(
    '''
    Please extract the important parts of the following email.
    {email_conversation}
    '''
)

llm = ChatOpenAI(temperature = 0, model = "gpt-4")

chain = prompt | llm

answer = chain.stream({"email_conversation" : email_conversation})
answer

<generator object RunnableSequence.stream at 0x7fd96ea73f40>

In [9]:
def stream_response(response, return_output = False):
    """
    Streams the response from the AI model, processing and printing each chunk.

    This function iterates over each item in the 'response' iterable. If an item is an instance of AIMessageChunk, it extracts and prints the content.
    If the item is a string, it prints the string directly.
    Optionally, the function can return the concatenated string of all response chunks.

    Args:
    - response (iterable): An iterable of response chunks, which can be AIMessageChunk objects or strings.
    - return_output (bool, optional): If True, the function returns the concatenated response string. The default is False.

    Returns:
    - str: If `return_output` is True, the concatenated response string. Otherwise, nothing is returned.
    """
    answer = ""
    for token in response:
      if isinstance(token, AIMessageChunk):
        answer += token.content
        print(token.content, end = "", flush = True)

      elif isinstance(token, str):
        answer += token
        print(token, end = "", flush = True)

    if return_output:
      return answer

output = stream_response(answer, return_output = True)

John, the Senior Executive Director at Bike Corporation, has expressed interest in the new bicycle model "ZENESIS" from Teddy International. He has requested a detailed brochure for the model, specifically seeking information on technical specifications, battery performance, and design aspects. John has also proposed a meeting to discuss potential collaboration on Tuesday, January 15th, at 10:00 AM at Teddy International's office.

### Uer Output Parser

In [15]:
class EmailSummary(BaseModel):
  parson: str = Field(desxription = "The sender in the email")
  email: str = Field(description = "The email address of the sender")
  subject: str = Field(description = "The subject of the email")
  summary: str = Field(description = "The summary of the email content")
  date: str = Field(description = "The meetion date and time mentioned in the email content")

In [16]:
parser = PydanticOutputParser(pydantic_object = EmailSummary)

In [17]:
print(parser)

pydantic_object=<class '__main__.EmailSummary'>


### Prompt 제작

In [18]:
prompt = PromptTemplate.from_template(
    '''
    You are a helpful assistant.

    QUESTION:
    {question}

    EMAIL CONVERSATION:
    {email_conversation}

    FORAMT:
    {format}
    '''
)

In [19]:
prompt = prompt.partial(format = parser.get_format_instructions())



*   **get_format_instructions()**: 언어 모델이 출력해야 하는 정보의 형식을 정의하는 지침을 제공합니다. 예를 들어, 언어 모델이 출력해야 하는 데이터 필드와 해당 필드의 형식을 지정하는 방법을 설명하는 문자열로 명령을 반환할 수 있습니다.
이러한 지침은 언어 모델이 출력을 구조화하고 특정 데이터 모델에 맞게 변환하는 데 매우 중요합니다.
*  **parser()**: 언어 모델의 출력(문자열로 가정)을 가져와 이를 분석하고 특정 구조로 변환합니다. Pydantic과 같은 도구를 사용하여 사전 정의 스키마에 대해 입력 문자열의 유효성을 검사하고 해당 스키마를 따르는 데이터 구조로 변환합니다.
*   **원하는 정보를 출력하기 위해 여러 과정을 거쳐야 함.**



In [66]:
test_instruct = parser.get_format_instructions()

In [67]:
print(test_instruct)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"parson": {"desxription": "The sender in the email", "title": "Parson", "type": "string"}, "email": {"description": "The email address of the sender", "title": "Email", "type": "string"}, "subject": {"description": "The subject of the email", "title": "Subject", "type": "string"}, "summary": {"description": "The summary of the email content", "title": "Summary", "type": "string"}, "date": {"description": "The meetion date and time mentioned in the email content", "title": "Date", "type": "string"}}, "required": ["parson", "emai

In [68]:
prompt = PromptTemplate.from_template(
    '''
    You are a helpful assistant.

    QUESTION:
    {question}

    EMAIL CONVERSATION:
    {email_conversation}

    FORAMT:
    {format}
    '''
)

In [69]:

prompt = prompt.partial(format = parser.get_format_instructions())

print(prompt)

input_variables=['email_conversation', 'question'] input_types={} partial_variables={'format': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"parson": {"desxription": "The sender in the email", "title": "Parson", "type": "string"}, "email": {"description": "The email address of the sender", "title": "Email", "type": "string"}, "subject": {"description": "The subject of the email", "title": "Subject", "type": "string"}, "summary": {"description": "The summary of the email content", "title": "Summary", "type": "string"}, "date": {"description": "The meetion date and 

In [70]:
chain = prompt | llm
print(chain)

first=PromptTemplate(input_variables=['email_conversation', 'question'], input_types={}, partial_variables={'format': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"parson": {"desxription": "The sender in the email", "title": "Parson", "type": "string"}, "email": {"description": "The email address of the sender", "title": "Email", "type": "string"}, "subject": {"description": "The subject of the email", "title": "Subject", "type": "string"}, "summary": {"description": "The summary of the email content", "title": "Summary", "type": "string"}, "date": {"description":

In [71]:
response = chain.stream(
    {
      "email_conversation" : email_conversation,
      "question" : "이메일을 요약해",
    }
)

In [73]:
output = stream_response(response, return_output = True)

###  : 구조화를 통해 원하는 정보만 출력 할 수 있도록 함.

In [42]:
chain = prompt | llm | parser

In [46]:
response = chain.invoke(
    {
      "email_conversation" : email_conversation,
      "question" : "이메일을 요약해",
    }
)

In [48]:
print(response)

parson='John' email='John@bikecorporation.me' subject='“ZENESIS” bike distribution cooperation and meeting schedule proposal' summary="John, Senior Executive Director at Bike Corporation, is interested in the new bicycle model, 'ZENESIS'. He requests a detailed brochure for the model, specifically information on technical specifications, battery performance, and design aspects. He also proposes a meeting to discuss potential collaboration." date='Tuesday, January 15th, at 10:00 AM'


In [54]:
# 원하는 내용만 쏙 골라낼 수 있음.
print(response.email)
print(response.parson)

John@bikecorporation.me
John


### mail.txt

In [91]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

In [92]:
email_conversation = '/content/mail.txt'

In [93]:
# EmailSummary 모델 정의
class EmailSummary(BaseModel):
    person: str = Field(description="The sender in the email")
    email: str = Field(description="The email address of the sender")
    subject: str = Field(description="The subject of the email")
    summary: str = Field(description="The summary of the email content")
    date: str = Field(description="The meeting date and time mentioned in the email content")
    is_spam: str = Field(description="Is the email spam or not. If it's spam, return '스팸'; otherwise, 'no spam'.")

# PydanticOutputParser를 사용하여 출력 구조화
parser = PydanticOutputParser(pydantic_object=EmailSummary)

In [101]:
prompt = PromptTemplate.from_template(
    '''
    You are a helpful assistant.

    QUESTION:
    {question}

    EMAIL CONVERSATION:
    {email_conversation}

    FORAMT:
    {format}
    '''
)

In [102]:
prompt = prompt.partial(format = parser.get_format_instructions())

print(prompt)

input_variables=['email_conversation', 'question'] input_types={} partial_variables={'format': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"person": {"description": "The sender in the email", "title": "Person", "type": "string"}, "email": {"description": "The email address of the sender", "title": "Email", "type": "string"}, "subject": {"description": "The subject of the email", "title": "Subject", "type": "string"}, "summary": {"description": "The summary of the email content", "title": "Summary", "type": "string"}, "date": {"description": "The meeting date and 

### 2

In [113]:
email_conversation = """
Delivered-To: naaa2004@jj.ac.kr
Received: by 2002:a05:7010:3d4:b0:421:d380:e3da with SMTP id v20csp1385711mdi;
        Sun, 12 Jan 2025 17:00:03 -0800 (PST)
X-Google-Smtp-Source: AGHT+IECADgzgDvX6MPwA2VkNrdQNrATFaYnFkFasu6alR/BxJ05o3MFRcwV8QDuLgrbld0mOgQQ
X-Received: by 2002:ac8:4918:0:b0:467:6100:d1ac with SMTP id d75a77b69052e-46c70ff7440mr252312841cf.18.1736730002918;
        Sun, 12 Jan 2025 17:00:02 -0800 (PST)
ARC-Seal: i=1; a=rsa-sha256; t=1736730002; cv=none;
        d=google.com; s=arc-20240605;
        b=ceoylfD9t4bNXrWsOCkPDyb3mYnz/PCk53mjTZOCxU5ZVNAiWTHFcH/0BFPSicBrhb
         dqURJSd8XZpqJ7mYA8R7pbvpi1OBdrjIXr5nLP3jKiunT6Cy3RqLYGF9IHr4QEaysZwY
         Gw5GJubUJCE7I4kw3y7/B4ZbLz9+XLm7y1wc7QVRmK9f4cXdBAfNMGtViKrsiy1uBpVy
         wvP32Ql6+vr9WePFoSznHu5Kjqw1W+yDiE923LDOgoACe2KpFdF0Exf7MQCZi6Ce/I6J
         Unqd9si8PuNp96J8usIvbQX+sOhJB9Cl7QprXO1NEGvg/Sv83Zkld8voEdJbeEF0SMrC
         HZPg==
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605;
        h=mime-version:feedback-id:list-unsubscribe-post:list-unsubscribe:to
         :reply-to:message-id:subject:date:from:dkim-signature:dkim-signature;
        bh=yWt88s15Gt26Aj9LZuGnH7AUxGiXIW/O/YjJxIfEosg=;
        fh=a9f3BgF576T6kB6rB7xXDLJFQHwvHtjVpqlQjClcluQ=;
        b=ESK0El+DhS/o5o3tbm8mahnSCtbioGLM1vunsjC4f2TbRffEBRH1HcCQx08uC7UXoC
         cx3kMH1Me0YFfdfsMsXQ4NZH5BKUoM1Xd4Dz9sHXKAHea+J0yNZm8WZ6kwWtIL2Uj5qK
         arma2YO2Jeh6knKOtfQFppe7FqMQetIt9oz2pu0q4TIuYDDRpEeE6q+JwkBbi5F72bvI
         w6/lB56rQYavXCpvxyPVluv58ym5Y3CjYTbY9musJX7SwealhX+xPKAiN/b0TYL7PD6j
         0Q8T9UeNwDAPEKGj0PvJVxoQN8Ufr21mo2enuBFrKi64dujAD6LsBamV06R7QQMN7QUH
         rMNA==;
        dara=google.com
ARC-Authentication-Results: i=1; mx.google.com;
       dkim=pass header.i=@pm.mtasv.net header.s=pm20241021 header.b=ADdBr+5b;
       dkim=pass header.i=@langchain.com header.s=20230705013107pm header.b=eB7QdIys;
       spf=pass (google.com: domain of pm_bounces@pm-bounces.langchain.com designates 50.31.156.121 as permitted sender) smtp.mailfrom=pm_bounces@pm-bounces.langchain.com;
       dmarc=pass (p=REJECT sp=REJECT dis=NONE) header.from=langchain.com
Return-Path: <pm_bounces@pm-bounces.langchain.com>
Received: from sc-ord-mta121.mtasv.net (sc-ord-mta121.mtasv.net. [50.31.156.121])
        by mx.google.com with ESMTPS id d75a77b69052e-46c87332647si82417251cf.31.2025.01.12.17.00.02
        for <naaa2004@jj.ac.kr>
        (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
        Sun, 12 Jan 2025 17:00:02 -0800 (PST)

"""

In [114]:
from itertools import chain
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import AIMessageChunk
from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import PydanticOutputParser

prompt = PromptTemplate.from_template(
    '''
    Please extract the important parts of the following email.
    {email_conversation}
    '''
)

llm = ChatOpenAI(temperature = 0, model = "gpt-4")

chain = prompt | llm

answer = chain.stream({"email_conversation" : email_conversation})
answer

<generator object RunnableSequence.stream at 0x7fd96e6ece40>

In [115]:
class EmailSummary(BaseModel):
  parson: str = Field(desxription = "The sender in the email")
  email: str = Field(description = "The email address of the sender")
  subject: str = Field(description = "The subject of the email")
  summary: str = Field(description = "The summary of the email content")
  date: str = Field(description = "The meetion date and time mentioned in the email content")

In [116]:
parser = PydanticOutputParser(pydantic_object = EmailSummary)

In [117]:
prompt = PromptTemplate.from_template(
    '''
    You are a helpful assistant.

    QUESTION:
    {question}

    EMAIL CONVERSATION:
    {email_conversation}

    FORAMT:
    {format}
    '''
)

In [118]:
prompt = prompt.partial(format = parser.get_format_instructions())

In [119]:
test_instruct = parser.get_format_instructions()

In [120]:
response = chain.stream(
    {
      "email_conversation" : email_conversation,
      "question" : "이메일이 스팸인지 구분해",
    }
)

In [121]:
output = stream_response(response, return_output = True)

The email was delivered to naaa2004@jj.ac.kr on Sun, 12 Jan 2025 17:00:03 -0800 (PST). The email was sent from pm_bounces@pm-bounces.langchain.com. The email passed the DKIM and SPF checks, indicating it is likely not spam or phishing.