In [1]:
from datetime import datetime 
import logfire 
from pydantic import BaseModel 

In [1]:
import re
import uuid
import random
import tqdm
import requests
import json
import json5
import fire 
import streamlit as st 
from PIL import Image
from io import BytesIO



from pydantic import BaseModel, Field
from typing_extensions import (Annotated, TypedDict, Sequence, Union, Optional, Literal, List, Dict, Iterator, Any)



from langchain_core.tools import InjectedToolCallId, BaseTool
from langchain.tools import tool
from langchain_ollama import ChatOllama
from langchain_core.messages import (HumanMessage, AIMessage, SystemMessage, BaseMessage, ToolMessage)
from langchain_core.prompts import PromptTemplate



from langgraph.types import Command, interrupt
from langgraph.graph.message import add_messages
from langgraph.store.memory import InMemoryStore
from langgraph.checkpoint.memory import MemorySaver
from langgraph.managed import IsLastStep
from langgraph.graph import (MessagesState, StateGraph, START, END)
from langgraph.prebuilt import (create_react_agent, ToolNode, tools_condition)



from tools import (add, subtract, multiply, divide, power, square_root)
from prompts_lib import Prompts 

In [3]:
LOW_TEMP_MODEL = ChatOllama(model="deepseek-r1:latest", temperature=0.1, num_predict=128_000)

In [None]:
from pydantic import BaseModel, Field, ValidationError
from typing import List, Optional

class User(BaseModel):
	intent: str = Field(
		..., description="Action the user wants to perform, e.g., 'build', 'train', 'evaluate'")
	expertise: str = Field(
		..., description="User's expertise level, e.g., 'beginner', 'medium', 'expert'")

class Problem(BaseModel):
	area: str
	downstream_task: str
	application_domain: str
	description: str
	performance_metrics: List[str] = []
	complexity_metrics: List[str] = []

class Dataset(BaseModel):
	name: str
	modality: List[str]
	target_variables: List[str]
	specification: Optional[str] = None
	description: str
	preprocessing: List[str] = []
	augmentation: List[str] = []
	visualization: List[str] = []
	source: str = Field(
		..., description="'user-upload', 'ai-generate'")

class Model(BaseModel):
	name: str
	family: str 
	type: str = Field(..., description="classical machine learning")
	specification: Optional[str] = None

class ParsedJSON(BaseModel):
	user: User
	problem: Problem
	dataset: List[Dataset]
	model: Model 

def validate_json(data):
	try:
		validated_data = ParsedJSON(**data)
		print(">>> JSON hợp lệ!")
		return validated_data
	except ValidationError as e:
		print(">>> JSON không hợp lệ:", e)
		return None

input_json = {
	"user": {"intent": "build", "expertise": "medium"},
	"problem": {
		"area": "tabular data analysis",
		"downstream_task": "tabular classification",
		"application_domain": "agriculture",
		"description": "Build a model to classify banana quality...",
		"performance_metrics": [],
		"complexity_metrics": []
	},
	"dataset": [
		{
			"name": "banana_quality",
			"modality": ["tabular"],
			"target_variables": ["quality"],
			"specification": None,
			"description": "A dataset containing numerical information about bananas..."
		}
	]
}

validate_json(input_json) 


In [None]:
prompt = [HumanMessage("What is the capital of France?")]

In [None]:
model.invoke(prompt)

In [None]:
system_msg = SystemMessage("""You are a helpful assistant that responds to questions with three exclamation marks.""")
human_msg = HumanMessage('What is the capital of France?')
model.invoke([system_msg, human_msg])

In [None]:
from langchain_core.prompts import PromptTemplate
template = PromptTemplate.from_template("""Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don't know".

Context: {context}

Question: {question}

Answer: """)

template.invoke({"context": """The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.""",
	"question": "Which model providers offer LLMs?"
})

In [None]:
completion = model.invoke(prompt)

In [None]:
from pydantic import BaseModel
class AnswerWithJustification(BaseModel):
	'''An answer to the user's question along with justification for the answer.'''
	answer: str
	'''The answer to the user's question'''
	justification: str
	'''Justification for the answer'''

In [None]:
structured_llm = model.with_structured_output(AnswerWithJustification)
structured_llm

In [None]:
structured_llm.invoke("What weighs more, a pound of bricks or a pound of feathers")