## dataset

https://dspy-docs.vercel.app/docs/building-blocks/data

https://dspy-docs.vercel.app/docs/cheatsheet#dspy-dataloaders

In [11]:
file_path = "../.data/mix_model/data/all_raretrible.txt"
data = open(file_path, "r").read().split("\n")
data = [x.strip().split(",") for x in data]
data = [[x[0], ",".join(x[1:])] for x in data]
print(data[0])

['感激', '好的,那我谢谢您了。']


In [16]:
import pandas as pd

# 转换 data 到 pandas DataFrame
df = pd.DataFrame(data, columns=["label", "text"])

# 保存为 CSV 文件
cvs_file = "../.data/data.csv"
df.to_csv(cvs_file, index=False)

In [18]:
from dspy.datasets import DataLoader

# 创建 dspy.datasets 对象
dl = DataLoader()
dataset = dl.from_csv(cvs_file, fields=("label", "text"), input_keys=("label", "text"))
print(dataset[0])

Example({'label': '感激', 'text': '好的,那我谢谢您了。'}) (input_keys={'text', 'label'})


In [33]:
splits = dl.train_test_split(dataset, train_size=20, test_size=50)
train_dataset = splits["train"]
test_dataset = splits["test"]

In [34]:
print(len(train_dataset), len(test_dataset))
print(train_dataset[0])
print(test_dataset[0])

20 50
Example({'label': '生气', 'text': '那里可以投诉你们'}) (input_keys={'text', 'label'})
Example({'label': '生气', 'text': '麻痹,不服来干我操你邪祖奶奶的'}) (input_keys={'text', 'label'})


In [35]:
sampled_example = dl.sample(dataset, n=5)  # `dataset` is a List of dspy.Example
print(len(sampled_example), sampled_example[0])

5 Example({'label': '中性', 'text': '您好,我女儿的巧虎快到期了'}) (input_keys={'text', 'label'})


In [39]:
import os

import dspy
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv(), override=True)
api_key = os.getenv("ONEAPI_API_KEY")
base_url = os.getenv("ONEAPI_BASE_URL")
turbo = dspy.OpenAI(
    api_key=api_key, model="glm-4-flash", api_base=base_url, model_type="chat"
)

dspy.settings.configure(
    lm=turbo,
)

In [51]:
import dspy
from dsp import passages2text


class BasicQA(dspy.Signature):
    """sentiment analysis from text"""

    question = dspy.InputField(desc="list of input text", format=passages2text)
    answer = dspy.OutputField(
        desc="question 中第一个输入的情绪分析结果列表，包括 ['中性', '惊讶', '感激', '抱怨', '焦急', '生气', '高兴']",
        type=list,
    )

In [52]:
# Define the predictor.
generate_answer = dspy.Predict(BasicQA)

samples = [x.text for x in sampled_example]
# Call the predictor on a particular input.
pred = generate_answer(question=samples)

# Print the input and the prediction.
print(f"Question: {samples}")
print(f"Predicted Answer: {pred.answer}")

Question: ['您好,我女儿的巧虎快到期了', '我看下都吓我一跳', '昂行吗谢谢了', '没有虚电我他妈闲着蛋疼', '我的账号怎么不在了']
Predicted Answer: [1] ['中性']
[2] ['惊讶']
[3] ['感激']
[4] ['生气']
[5] ['焦急']


In [53]:
turbo.inspect_history(n=1)




sentiment analysis from text

---

Follow the following format.

Question: list of input text
Answer: question 中第一个输入的情绪分析结果列表，包括 ['中性', '惊讶', '感激', '抱怨', '焦急', '生气', '高兴']

---

Question:
[1] «您好,我女儿的巧虎快到期了»
[2] «我看下都吓我一跳»
[3] «昂行吗谢谢了»
[4] «没有虚电我他妈闲着蛋疼»
[5] «我的账号怎么不在了»
Answer:[32m [1] ['中性']
[2] ['惊讶']
[3] ['感激']
[4] ['生气']
[5] ['焦急'][0m





"\n\n\nsentiment analysis from text\n\n---\n\nFollow the following format.\n\nQuestion: list of input text\nAnswer: question 中第一个输入的情绪分析结果列表，包括 ['中性', '惊讶', '感激', '抱怨', '焦急', '生气', '高兴']\n\n---\n\nQuestion:\n[1] «您好,我女儿的巧虎快到期了»\n[2] «我看下都吓我一跳»\n[3] «昂行吗谢谢了»\n[4] «没有虚电我他妈闲着蛋疼»\n[5] «我的账号怎么不在了»\nAnswer:\x1b[32m [1] ['中性']\n[2] ['惊讶']\n[3] ['感激']\n[4] ['生气']\n[5] ['焦急']\x1b[0m\n\n\n"

In [27]:
generate_answer = dspy.ChainOfThought(BasicQA)

# Call the predictor on a particular input alongside a hint.
question = sampled_example[0].text
pred = generate_answer(question=question)

In [29]:
pred.answer

'Question: 因为就是他骗的我\nReasoning: Let\'s think step by step in order to produce the answer. We need to analyze the sentiment of the text "因为就是他骗的我" by breaking down the meaning and emotional tone of the words and phrases used. 1. "因为" (yīn wèi) means "because," which'