<a href="https://colab.research.google.com/github/ahsanrazi/LangChain/blob/main/05_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classify Text into Labels

In [1]:
# Classify text into categories or labels using chat models with structured outputs.

# Tagging means labeling a document with classes such as:
# Sentiment
# Language
# Style (formal, informal etc.)
# Covered topics
# Political tendency

In [None]:
# Tagging has a few components

# function: tagging uses functions to specify how the model should tag a document
# schema: defines how we want to tag the document

In [2]:
!pip install -qU langchain-core
!pip install -qU langchain-google-genai

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m412.7/412.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from google.colab import userdata

gemini_api_key = userdata.get('GEMINI_API_KEY')

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model = "gemini-2.0-flash-exp", api_key=gemini_api_key)

In [6]:
# Let's specify a Pydantic model with a few properties and their expected type in our schema.

from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import BaseModel, Field

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)


class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(description="How aggressive the text is on a scale from 1 to 10")
    language: str = Field(description="The language the text is written in")


In [7]:
# LLM
llm = ChatGoogleGenerativeAI(model = "gemini-2.0-flash-exp", api_key=gemini_api_key).with_structured_output(Classification)

In [8]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [15]:
response

Classification(sentiment='Positive', aggressiveness=1, language='Spanish')

In [16]:
# f we want dictionary output, we can just call .model_dump()
response.model_dump()

{'sentiment': 'Positive', 'aggressiveness': 1, 'language': 'Spanish'}

# Finer control

In [17]:
# Careful schema definition gives us more control over the model's output.

# Specifically, we can define:
# Possible values for each property
# Description to make sure that the model understands the property
# Required properties to be returned

In [47]:
tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

class Classification(BaseModel):
    sentiment: str = Field(..., enum=["happy", "neutral", "sad"])
    aggressiveness: str = Field(..., description="describes how aggressive the statement is, the higher the number the more aggressive", enum=['1', '2', '3', '4', '5'])
    language: str = Field(..., enum=["spanish", "english", "french", "german", "italian"])


In [48]:
llm = ChatGoogleGenerativeAI(model = "gemini-2.0-flash-exp", api_key=gemini_api_key).with_structured_output(Classification)

In [50]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [51]:
response

Classification(sentiment='happy', aggressiveness='1', language='spanish')

In [52]:
response.model_dump()

{'sentiment': 'happy', 'aggressiveness': '1', 'language': 'spanish'}

In [53]:
inp = "Weather is ok here, I can go outside without much more than a coat"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

In [54]:
response

Classification(sentiment='neutral', aggressiveness='1', language='english')