In [None]:
pip install transformers --quiet

In [None]:
pip install torch --quiet

In [None]:
pip install langchain_core --quiet

# LLM Model for Project

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-0.6B")
messages = [
    {
    	"role": "user",
     	"content": 
        """ 
        You are a dataframe expert.
        Your mission is to follow the steps to finish the mission
        	first step: Data view analyzing and cleaning
	        second step: Data pre-processing
    	    third step: Feature engineering

        Please write the python code to implement these steps.
        """
    },
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=2000)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

In [None]:
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.outputs import ChatResult, ChatGeneration
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

class ChatQwen3(BaseChatModel):
    def __init__(self, model_name="Qwen/Qwen3-0.6B", device="cpu"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

    @property
    def _llm_type(self):
        return "qwen3-chat"

    def _generate(
        self, messages: list[BaseMessage], stop=None, run_manager=None, **kwargs
    ) -> ChatResult:
        # 將多個對話訊息串接成 prompt
        prompt = ""
        for m in messages:
            if m.type == "human":
                prompt += f"User: {m.content}\n"
            else:
                prompt += f"Assistant: {m.content}\n"
        prompt += "Assistant:"

        input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
        outputs = self.model.generate(**input_ids, max_new_tokens=200)
        answer = self.tokenizer.decode(outputs[0][input_ids["input_ids"].shape[-1]:],
                                       skip_special_tokens=True)
        ai_message = AIMessage(content=answer)
        return ChatResult(generations=[ChatGeneration(message=ai_message)])

# 之後在 LangGraph 中使用 ChatQwen3 作為節點模型即可


In [None]:
pip install scikit-learn --quiet

In [None]:

import pandas as pd

# Step 1: Data View Analysis and Cleaning
# Sample data (replace with your actual data)
data = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [25, 30, 22, 40, 28],
    'Salary': [50000, 60000, 45000, 70000, 55000],
    'Gender': ['M', 'F', 'M', 'F', 'M'],
    'Education': ['Bachelor', 'Master', 'Doctorate', 'Bachelors', 'Master']
})

# Data Cleaning
# Remove duplicates
data = data.drop_duplicates(subset=['Name', 'Age', 'Gender', 'Education'])

# Handle missing values
data = data.fillna(0)

# Step 2: Data Pre-processing
# Convert categorical variables to numerical
data = pd.get_dummies(data, columns=['Gender', 'Education'])

# Step 3: Feature Engineering
# Add new features (e.g., Age * 2)
data['Age * 2'] = data['Age'] * 2
data
# Train a simple model (e.g., logistic regression)
# from sklearn.linear_model import LogisticRegression
# model = LogisticRegression()
# model.fit(data[['Age * 2', 'Gender', 'Education']], data['Salary'])

# # Example output
# print("Preprocessed Data:")
# print(data.head())

In [None]:
from langchain_experimental.utilities import PythonREPL
repl = PythonREPL()
@tool
def code_execution_tool(
    code: Annotated[str, "Python code string which should be executed in tool"],
):
    """
    use this tool to execute python code 
    """

    logger.info(f"code_execution_tool is called with code:\n {code}\n")
    outcome = repl.run(code)

    return json.dumps({"Python Code": f"Executed python code is: {code}",
                       "Outcome": f"Outcome after executing python code is: {outcome}"}, ensure_ascii=False )
