# JSON Schema Classification & Information Extraction

In [41]:
from pydantic import BaseModel, EmailStr, ValidationError
from typing import Optional
from openai import OpenAI
import json

class ChatInfo(BaseModel):
    name: Optional[str] = None
    email: Optional[EmailStr] = None
    phone: Optional[str] = None
    location: Optional[str] = None
    age: Optional[int] = None

In [42]:
client = OpenAI(
    api_key="",
    base_url="https://api.groq.com/openai/v1"
)



In [43]:

extract_user_info_schema = {
    "name": "extract_user_info",
    "description": "Extract available user details from chat messages. Only extract information that is explicitly mentioned.",
    "parameters": {
        "type": "object",
        "properties": {
            "name": {
                "type": "string",
                "description": "Full name of the user (only if explicitly mentioned)"
            },
            "email": {
                "type": "string",
                "description": "User's email address (only if explicitly mentioned)"
            },
            "phone": {
                "type": "string",
                "description": "User's phone number (only if explicitly mentioned)"
            },
            "location": {
                "type": "string",
                "description": "User's location - city, state, or country (only if explicitly mentioned)"
            },
            "age": {
                "type": "integer",
                "description": "User's age (only if explicitly mentioned)"
            }
        },
        # to prevent hallucination
        "required": []
    }
}


In [47]:

def contains_user_info(chat_input: str) -> bool:
    """Check if the message contains any user information before attempting extraction"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": "Determine if the user's message contains any personal information like name, email, phone, location, or age. Respond with only 'yes' or 'no'."
            },
            {
                "role": "user",
                "content": chat_input
            }
        ]
    )
    return response.choices[0].message.content.strip().lower() == "yes"


In [51]:
def extract_user_info_safely(chat_input: str) -> ChatInfo:
    # First check if there's any user info to extract
    if not contains_user_info(chat_input):
        print("No user information detected in message")
        return ChatInfo()

    try:
        response = client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that extracts user information from messages. Only extract information that is explicitly stated. Do not guess or make up information. If information is not provided, leave those fields empty."
                },
                {
                    "role": "user",
                    "content": chat_input
                }
            ],
            functions=[extract_user_info_schema],
            function_call={"name": "extract_user_info"}
        )

        # Parse the JSON string returned by the model
        raw_json = response.choices[0].message.function_call.arguments
        data = json.loads(raw_json)

        # Filter out empty strings and convert them to None
        filtered_data = {k: v for k, v in data.items() if v is not None and v != ""}

        # Validate and parse with Pydantic
        user_info = ChatInfo(**filtered_data)
        return user_info

    except ValidationError as e:
        print(f"Validation error: {e}")
        return ChatInfo()  # Return empty ChatInfo object
    except json.JSONDecodeError as e:
        print(f"JSON decode error: {e}")
        return ChatInfo()
    except Exception as e:
        print(f"Unexpected error: {e}")
        return ChatInfo()



In [56]:

test_inputs = [
    "hi there",
    "Hey, I'm Harshita, 21 years old, living in Gurgaon. You can email me at harshita@example.com or call me at +919876543210.",
    "My name is John and I'm 25", "her number is 123absc"
]

for test_input in test_inputs:
    print(f"\nInput: {test_input}")
    result = extract_user_info_safely(test_input)

    print(f"Extracted: {result.model_dump_json(indent=2)}")

    print(f"As dict: {result.model_dump(exclude_none=True)}")
    print("-" * 50)


Input: hi there
No user information detected in message
Extracted: {
  "name": null,
  "email": null,
  "phone": null,
  "location": null,
  "age": null
}
As dict: {}
--------------------------------------------------

Input: Hey, I'm Harshita, 21 years old, living in Gurgaon. You can email me at harshita@example.com or call me at +919876543210.
Extracted: {
  "name": "Harshita",
  "email": "harshita@example.com",
  "phone": "+919876543210",
  "location": "Gurgaon",
  "age": 21
}
As dict: {'name': 'Harshita', 'email': 'harshita@example.com', 'phone': '+919876543210', 'location': 'Gurgaon', 'age': 21}
--------------------------------------------------

Input: My name is John and I'm 25
Extracted: {
  "name": "John",
  "email": null,
  "phone": null,
  "location": null,
  "age": 25
}
As dict: {'name': 'John', 'age': 25}
--------------------------------------------------

Input: My name is latika my age is twenty five
Extracted: {
  "name": "latika",
  "email": null,
  "phone": null,
  "