In [1]:
import json
import os

def find_data_by_ids(jsonl_file_path, worker_id, sample_id):
    """
    根据worker_id和sample_id查找并返回对应的数据行
    
    Args:
        jsonl_file_path (str): JSONL文件路径
        worker_id (int): 工作员ID
        sample_id (int): 样本ID
    
    Returns:
        dict or None: 找到的数据行，如果未找到则返回None
    """
    if not os.path.exists(jsonl_file_path):
        print(f"文件不存在: {jsonl_file_path}")
        return None
    
    with open(jsonl_file_path, 'r', encoding='utf-8') as file:
        for line_num, line in enumerate(file, 1):
            try:
                data = json.loads(line.strip())
                if data.get('worker_id') == worker_id and data.get('sample_id') == sample_id:
                    print(f"在第 {line_num} 行找到匹配的数据")
                    return data
            except json.JSONDecodeError as e:
                print(f"第 {line_num} 行JSON解析错误: {e}")
                continue
    
    print(f"未找到 worker_id={worker_id}, sample_id={sample_id} 的数据")
    return None

def display_data(data):
    """
    格式化显示数据内容
    
    Args:
        data (dict): 要显示的数据字典
    """
    if data is None:
        print("没有数据可显示")
        return
    
    print("=" * 80)
    print(f"Worker ID: {data.get('worker_id')}")
    print(f"Sample ID: {data.get('sample_id')}")
    print("=" * 80)
    
    print(f"\n📝 Instruction:")
    print(f"{data.get('instruction', 'N/A')}")
    
    if data.get('input'):
        print(f"\n📥 Input:")
        print(f"{data.get('input')}")
    
    print(f"\n📄 Content:")
    print(f"{data.get('content', 'N/A')}")
    
    print(f"\n🏷️  Label:")
    print(f"{data.get('label', 'N/A')}")
    
    print(f"\n📊 Counts: {data.get('counts', 'N/A')}")
    print("=" * 80)

# 测试函数
if __name__ == "__main__":
    # 数据文件路径
    jsonl_file = "merged_with_labels_and_counts.jsonl"
    
    # 测试查找功能
    print("测试查找功能:")
    print("1. 查找 worker_id=0, sample_id=0 的数据:")
    data1 = find_data_by_ids(jsonl_file, 0, 0)
    if data1:
        display_data(data1)
    
    print("\n" + "="*100 + "\n")
    
    print("2. 查找 worker_id=0, sample_id=1 的数据:")
    data2 = find_data_by_ids(jsonl_file, 0, 1)
    if data2:
        display_data(data2)


测试查找功能:
1. 查找 worker_id=0, sample_id=0 的数据:
在第 1 行找到匹配的数据
Worker ID: 0
Sample ID: 0

📝 Instruction:
Give three tips for staying healthy.

📄 Content:
.<think>
Okay, the user asked for three tips for staying healthy. Let me think about the most important ones. First, diet is crucial. Eating balanced meals with fruits, vegetables, whole grains, and lean proteins. Maybe mention avoiding processed foods and staying hydrated. That's a solid first tip.

Next, exercise. Regular physical activity is essential. Suggest at least 150 minutes of moderate exercise a week, like walking or cycling. Also, include strength training. But I should make it simple and relatable, not too technical.

Third, sleep. Adults need 7-9 hours. Poor sleep affects health, so emphasizing the importance of a consistent sleep schedule and a restful environment. Maybe mention avoiding screens before bed. That covers the basics without being overwhelming.

Wait, are there other important aspects? Mental health, maybe? But 

In [2]:
# 使用示例：查找特定worker_id和sample_id的数据
def search_data(worker_id, sample_id):
    """
    简化的查找函数，直接显示结果
    
    Args:
        worker_id (int): 工作员ID
        sample_id (int): 样本ID
    """
    jsonl_file = "merged_with_labels_and_counts.jsonl"
    data = find_data_by_ids(jsonl_file, worker_id, sample_id)
    if data:
        display_data(data)
    return data

# 示例用法：
# search_data(0, 0)  # 查找 worker_id=0, sample_id=0 的数据
# search_data(0, 1)  # 查找 worker_id=0, sample_id=1 的数据
# search_data(1, 5)  # 查找 worker_id=1, sample_id=5 的数据

print("数据查找功能已准备就绪！")
print("使用方法：search_data(worker_id, sample_id)")
print("例如：search_data(0, 0)")


数据查找功能已准备就绪！
使用方法：search_data(worker_id, sample_id)
例如：search_data(0, 0)


In [17]:
# 测试功能
print("开始测试数据查找功能...")

search_data(6, 1802)


开始测试数据查找功能...
在第 35133 行找到匹配的数据
Worker ID: 6
Sample ID: 1802

📝 Instruction:
Create a user-friendly registration form.

📄 Content:
Okay, the user wants me to create a user-friendly registration form. Let me start by thinking about what makes a registration form user-friendly. First, it should be simple and not have too many fields. People don't like filling out long forms. So maybe start with the basics: name, email, password, and maybe a confirm password. But wait, should I include more fields like phone number or date of birth? Maybe not unless it's necessary. The user didn't specify, so I'll stick to the essentials.

Next, the form should be visually appealing. Using a clean layout with proper spacing. Maybe a card-style design to make it look modern. Colors should be neutral, maybe a light background with a subtle accent color for buttons. Also, ensuring the form is responsive so it works on mobile devices.

Accessibility is important too. Labels should be clear and associated with

{'worker_id': 6,
 'sample_id': 1802,
 'instruction': 'Create a user-friendly registration form.',
 'input': '',
 'content': 'Okay, the user wants me to create a user-friendly registration form. Let me start by thinking about what makes a registration form user-friendly. First, it should be simple and not have too many fields. People don\'t like filling out long forms. So maybe start with the basics: name, email, password, and maybe a confirm password. But wait, should I include more fields like phone number or date of birth? Maybe not unless it\'s necessary. The user didn\'t specify, so I\'ll stick to the essentials.\n\nNext, the form should be visually appealing. Using a clean layout with proper spacing. Maybe a card-style design to make it look modern. Colors should be neutral, maybe a light background with a subtle accent color for buttons. Also, ensuring the form is responsive so it works on mobile devices.\n\nAccessibility is important too. Labels should be clear and associated wi