In [1]:
# deepseek模型的api_key
dp_api_key = "sk-5c17979d4ce24427880fbd1dd107ecd5"
data_batch = "sample"

In [2]:
import asyncio
from openai import AsyncOpenAI
from openai import OpenAI
from tenacity import retry, stop_after_attempt, wait_exponential
import pandas
import math
from tqdm.asyncio import tqdm_asyncio
from tqdm.notebook import tqdm

In [3]:
# 全局初始化客户端与菜单信息
aclient = AsyncOpenAI(
    api_key=dp_api_key,
    base_url="https://api.deepseek.com"
)

In [4]:
# 大模型调用函数
def get_intro(comment):
    client = OpenAI(api_key=dp_api_key, base_url="https://api.deepseek.com")
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "你是一名精明的房产从业者，熟知评判一处房屋好坏的各种条件"},
            {"role": "user", "content": f"假设将房屋硬件条件良好、配套设施齐全、周边条件便利定义为“好”，将配套设施不全、条件恶劣定义为“不好”，【{comment}】这句评价中的房屋更贴近“很好、比较好、适中、比较不好、很不好”这五种选项中的哪一种？请告诉我五个选项中最合适的一个。我需要依据你的回答进行分类，因此不要修改选项，不要解释理由。"}
        ],
        stream=False
    )
    return response.choices[0].message.content

In [5]:
# com = "电费按表计量，治安堪忧，拎包入住"
# print(get_intro(com))

In [6]:
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1))
async def async_get_intro(comment, semaphore):
    async with semaphore:  # 控制并发量
        response = await aclient.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": "你是一名精明的房产从业者，熟知评判一处房屋好坏的各种条件"},
                {"role": "user", "content": f"假设将房屋硬件条件良好、配套设施齐全、周边条件便利定义为“好”，将配套设施不全、条件恶劣定义为“不好”，【{comment}】这句评价中的房屋更贴近“很好、比较好、适中、比较不好、很不好”这五种选项中的哪一种？请告诉我五个选项中最合适的一个。我需要依据你的回答进行分类，因此不要修改选项，不要解释理由。"}
            ],
            stream=False
        )
        return response.choices[0].message.content

In [7]:
import asyncio
from tqdm.asyncio import tqdm_asyncio

async def batch_get_intros(items, max_concurrency=50):
    semaphore = asyncio.Semaphore(max_concurrency)
    
    async def bounded_get_intro(item):
        async with semaphore:
            # 添加缺失的 semaphore 参数
            return await async_get_intro(item, semaphore)
    
    tasks = [bounded_get_intro(item) for item in items]
    
    results = await tqdm_asyncio.gather(
        *tasks,
        desc=f"Processing: (max_concurrency={max_concurrency})",
        total=len(items),
        ascii=True,
        mininterval=0.1
    )
    return results

In [8]:
data_batch = "train"
data_target = "price"
path = f"结构化数据_{data_batch}_{data_target}.csv"  # 存有菜品名的csv文件地址
df = pandas.read_csv(path)

In [9]:
batch_size = 1000  # 单次处理量
comment_column = "客户反馈"  # 存有菜品名的列名
id_column = "Price"
n_dim = df.shape[0]

In [10]:
# 分批次获取结果
for epoch in range(math.ceil(n_dim / batch_size)):
    start_idx = epoch * batch_size
    end_idx = min(start_idx + batch_size, n_dim)
    
    # 获取当前批次菜品名
    batch_comments = df.iloc[start_idx:end_idx][comment_column].tolist()
    batch_ids = df.iloc[start_idx:end_idx][id_column].tolist()
#     for item in batch_items:
#         item = str(item)
    
    # 异步获取菜品标签
    feel_results = await batch_get_intros(batch_comments) # 确保返回顺序与 batch_items 一致
    
    # 构建DataFrame并进行二次检查
    batch_intro_df = pandas.DataFrame({
        "ID": batch_ids,
        "客户反馈": batch_comments,
        "客户反馈评级": feel_results
    })
    
    # 保存文件
    output_path = f"batch_result//客户反馈评级//{data_batch}_{data_target}//epoch{epoch}.csv"
    batch_intro_df.to_csv(output_path, index=False)
    
    # 更新进度
    print(f"Processed: {epoch+1}/{math.ceil(n_dim / batch_size)} batches")

Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.88it/s]


Processed: 1/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.31it/s]


Processed: 2/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.84it/s]


Processed: 3/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.77it/s]


Processed: 4/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.19it/s]


Processed: 5/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.39it/s]


Processed: 6/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.70it/s]


Processed: 7/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.16it/s]


Processed: 8/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.34it/s]


Processed: 9/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.10it/s]


Processed: 10/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.66it/s]


Processed: 11/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.33it/s]


Processed: 12/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:43<00:00, 23.19it/s]


Processed: 13/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.19it/s]


Processed: 14/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.08it/s]


Processed: 15/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.91it/s]


Processed: 16/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.72it/s]


Processed: 17/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.80it/s]


Processed: 18/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.35it/s]


Processed: 19/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.14it/s]


Processed: 20/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.72it/s]


Processed: 21/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.88it/s]


Processed: 22/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.89it/s]


Processed: 23/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.27it/s]


Processed: 24/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.18it/s]


Processed: 25/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.53it/s]


Processed: 26/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.11it/s]


Processed: 27/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.43it/s]


Processed: 28/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.56it/s]


Processed: 29/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.38it/s]


Processed: 30/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.80it/s]


Processed: 31/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.55it/s]


Processed: 32/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.81it/s]


Processed: 33/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:43<00:00, 23.17it/s]


Processed: 34/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:44<00:00, 22.60it/s]


Processed: 35/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.55it/s]


Processed: 36/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.90it/s]


Processed: 37/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.44it/s]


Processed: 38/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.34it/s]


Processed: 39/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.70it/s]


Processed: 40/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.07it/s]


Processed: 41/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.80it/s]


Processed: 42/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.85it/s]


Processed: 43/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.32it/s]


Processed: 44/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.60it/s]


Processed: 45/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.58it/s]


Processed: 46/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.33it/s]


Processed: 47/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.33it/s]


Processed: 48/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.52it/s]


Processed: 49/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:43<00:00, 23.22it/s]


Processed: 50/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:47<00:00, 21.05it/s]


Processed: 51/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:44<00:00, 22.35it/s]


Processed: 52/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.51it/s]


Processed: 53/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.73it/s]


Processed: 54/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.88it/s]


Processed: 55/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.03it/s]


Processed: 56/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.69it/s]


Processed: 57/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.35it/s]


Processed: 58/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.26it/s]


Processed: 59/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.45it/s]


Processed: 60/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.25it/s]


Processed: 61/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.02it/s]


Processed: 62/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.14it/s]


Processed: 63/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.73it/s]


Processed: 64/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.76it/s]


Processed: 65/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.61it/s]


Processed: 66/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.12it/s]


Processed: 67/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.14it/s]


Processed: 68/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.11it/s]


Processed: 69/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.28it/s]


Processed: 70/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.20it/s]


Processed: 71/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.73it/s]


Processed: 72/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.87it/s]


Processed: 73/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.66it/s]


Processed: 74/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.76it/s]


Processed: 75/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.83it/s]


Processed: 76/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.51it/s]


Processed: 77/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.44it/s]


Processed: 78/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.82it/s]


Processed: 79/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:39<00:00, 25.27it/s]


Processed: 80/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.81it/s]


Processed: 81/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:39<00:00, 25.39it/s]


Processed: 82/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:39<00:00, 25.06it/s]


Processed: 83/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:39<00:00, 25.31it/s]


Processed: 84/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.33it/s]


Processed: 85/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:39<00:00, 25.16it/s]


Processed: 86/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.73it/s]


Processed: 87/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.93it/s]


Processed: 88/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.80it/s]


Processed: 89/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.18it/s]


Processed: 90/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.93it/s]


Processed: 91/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.81it/s]


Processed: 92/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.97it/s]


Processed: 93/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.50it/s]


Processed: 94/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.72it/s]


Processed: 95/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 23.94it/s]


Processed: 96/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.48it/s]


Processed: 97/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:40<00:00, 24.59it/s]


Processed: 98/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.03it/s]


Processed: 99/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:41<00:00, 24.24it/s]


Processed: 100/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.78it/s]


Processed: 101/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:42<00:00, 23.73it/s]


Processed: 102/104 batches


Processing: (max_concurrency=50): 100%|##########| 1000/1000 [00:43<00:00, 23.18it/s]


Processed: 103/104 batches


Processing: (max_concurrency=50): 100%|##########| 871/871 [00:37<00:00, 23.02it/s]


Processed: 104/104 batches
