In [1]:
%load_ext autoreload
%autoreload 2

# Import dependencies


In [44]:
import os

from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.runnables import chain
from pydantic import BaseModel, Field
from enum import Enum


from IPython.display import display, Markdown, Latex

from typing import List

# Configure Google credentials

- **NOTE**: Remember change the `GOOGLE_APPLICATION_CREDENTIALS` to the path of your own Google credentials file.


In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = (
    "/home/cuongdm/git-cuongpiger/secret/work/vngcloud/ai-platform/vertex-ai-credential.json"
)

# Prepare the LLM


In [10]:
tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

In [11]:
class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(
        description="How aggressive the text is on a scale from 1 to 10"
    )
    language: str = Field(description="The language the text is written in")

In [12]:
llm = ChatVertexAI(temperature=0, model="gemini-1.5-flash").with_structured_output(
    Classification
)

In [19]:
inp = "Tôi vô cùng vui mừng được gặp bạn! Tôi nghĩ chúng ta sẽ là những người bạn rất tốt!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [20]:
response

Classification(sentiment='positive', aggressiveness=1, language='Vietnamese')

In [29]:
inp = "Tao rất ghét mày. Mày sẽ phải chịu hậu quả vì đã làm vậy với tao!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [30]:
response

Classification(sentiment='negative', aggressiveness=10, language='Vietnamese')

In [31]:
inp = "Đụ má mày"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [32]:
response

Classification(sentiment='negative', aggressiveness=10, language='Vietnamese')

# Finer control

- Careful schema definition gives us more control over the model's output.
- Specifically, we can define:
  - Possible values for each property
  - Description to make sure that the model understands the property
  - Required properties to be returned
- Let's redeclare our **Pydantic** model to control for each of the previously mentioned aspects using enums:

In [63]:
class Classification(BaseModel):
    sentiment: str = Field(..., enum=["happy", "neutral", "sad"])
    aggressiveness: int = Field(
        ...,
        description="describes how aggressive the statement is, the higher the number the more aggressive",
        enum=["1", "2", "3", "4", "5"],
    )
    language: str = Field(..., enum=["vietnamese", "english"])

In [64]:
tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

In [65]:
llm = ChatVertexAI(temperature=0, model="gemini-1.5-flash").with_structured_output(
    Classification
)

In [66]:
inp = "Chời ơi, gặp được mày tao vui quá chừng!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [67]:
response

Classification(sentiment='happy', aggressiveness=1, language='vietnamese')

In [68]:
inp = "Mày là đồ chó đẻ"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [69]:
response

Classification(sentiment='sad', aggressiveness=5, language='vietnamese')

In [71]:
inp = "Tao chưa từng nghĩ mày lại làm vậy với tao, đéo hiểu thế nào mày lại làm vậy trong khi tao đã giúp mày nhiều lần"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [72]:
response

Classification(sentiment='sad', aggressiveness=4, language='vietnamese')

In [73]:
inp = "Dạo này ổn chứ bạn hiền"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [74]:
response

Classification(sentiment='happy', aggressiveness=1, language='vietnamese')