notebookb5d5e3163e Version 16 of 24
Public Score (0.81200)

In [2]:
PROMPTS_SYS = """You are a financial analyst taking a test to evaluate your knowledge of finance of different topics in finance. You think step by step approach with reflection to answer queries.
- Answer must be one of the following values: A, B, C, D, E, Fall, or Rise.
- Reason step-by-step internally but only return the final answer in the format: `<id>,<answer>`
- If the correct answer implies "ขึ้น" (increase), respond with `Rise`  
- If the correct answer implies "ลง" (decrease), respond with `Fall`
  
Tags:
[multiple_choice, classification, english, thai, hr, job_design, management, entrepreneur, business, people, ethics, coso, internal_control, corporate_governance, macroeconomics, fiscal_policy, monetary_policy, gdp, financial_forecasting, stock_market, time_series, sentiment_analysis, nlp, prediction, intel]

Here are examples:
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx,D 
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx,A
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx,Rise
"""

In [None]:
import re
import csv
import time
from openai import OpenAI

import tiktoken


# === CONFIGURATION ===
INPUT_FILE = '/kaggle/input/financial-analysis-agent/test.csv'
OUTPUT_FILE = '/kaggle/working/submission.csv'
API_KEY = ''  # <-- ใส่ API Key
BASE_URL = 'https://openrouter.ai/api/v1'
MODEL_NAME = 'deepseek/deepseek-chat-v3-0324:free'  # <-- ใส่ชื่อโมเดลที่ต้องการใช้

MAX_TOTAL_TOKENS = 4048
RESERVED_OUTPUT_TOKENS = 300
MAX_INPUT_TOKENS = MAX_TOTAL_TOKENS - RESERVED_OUTPUT_TOKENS

# โหลด tokenizer (ใช้ tokenizer ของ mistral ที่ใกล้เคียงกับ typhoon ได้)

tokenizer = tiktoken.get_encoding("cl100k_base")


client = OpenAI(api_key=API_KEY, base_url=BASE_URL)

# === โหลดและเตรียมคำถาม ===
with open(INPUT_FILE, 'r', encoding='utf-8') as infile:
    lines = [line for line in infile if line.strip() != ""]

text = "".join(lines)

items = re.findall(
    r"([0-9a-fA-F\-]{36}),(.*?)(?=\n[0-9a-fA-F\-]{36},|\Z)",
    text,
    flags=re.DOTALL
)

# === จัดกลุ่มคำถามด้วย tokenizer ===
batches = []
current_batch = []
current_tokens = 0


for qid, content in items:
    # ตัดบรรทัดแรกออกจาก content.strip()
    lines = content.strip().splitlines()
    content_without_first_line = "\n".join(lines[1:])  # ละบรรทัดแรก

    prompt_piece = f"id: {qid}\n{content_without_first_line}\n\n"
    tokens = len(tokenizer.encode(prompt_piece))

    if current_tokens + tokens > MAX_INPUT_TOKENS:
        batches.append(current_batch)
        current_batch = [(qid, content)]
        current_tokens = tokens
    else:
        current_batch.append((qid, content))
        current_tokens += tokens

    

if current_batch:
    batches.append(current_batch)

# === สร้าง prompt และถาม typhoon ===
def build_prompt(batch_items):
    prompt = ""
    for qid, content in batch_items:
        prompt += f"id: {qid}\n{content.strip()}\n\n"
    prompt += "Please respond with the correct answer for each question in format:\nid,answer"
    return prompt

def ask_typhoon(batch_items):
    prompt = build_prompt(batch_items)
    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": PROMPTS_SYS},
                {"role": "user", "content": prompt}
            ],
            max_tokens=RESERVED_OUTPUT_TOKENS,
            temperature=0,
            top_p=1,
            extra_body={
                "chat_template_kwargs": {"enable_thinking": False}
            }
        )

        raw = response.choices[0].message.content.strip()
        print(f"✅ คำตอบ:\n{raw}\n")

        result = []
        found_ids = set()

        for line in raw.splitlines():
            match = re.match(r"([0-9a-fA-F\-]{36})[,:\s]+(A|B|C|D|E|Fall|Rise)", line.strip(), flags=re.I)
            if match:
                qid, answer = match.groups()
                result.append({'id': qid, 'answer': answer})
                found_ids.add(qid)

        # เพิ่มคำตอบว่างสำหรับ qid ที่ไม่มีในผลลัพธ์
        for qid, _ in batch_items:
            if qid not in found_ids:
                result.append({'id': qid, 'answer': ''})

        return result

    except Exception as e:
        print(f"❌ ERROR: {e}")
        return [{'id': qid, 'answer': ''} for qid, _ in batch_items]

# === ส่งทีละ batch ===
answers = []

for i, batch in enumerate(batches):
    print(f"\n🟡 Batch {i+1}/{len(batches)}: {len(batch)} ข้อ | 🎯 ตอบแล้ว {len(answers)}/{len(items)} ข้อ")

    result = ask_typhoon(batch)
    answers.extend(result)
    # time.sleep(3)

# === บันทึกไฟล์ ===
with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=['id', 'answer'])
    writer.writeheader()
    writer.writerows(answers)

print(f"\n✅ เสร็จสิ้น! เขียนไฟล์ {OUTPUT_FILE} เรียบร้อยแล้ว.")


🟡 Batch 1/89: 8 ข้อ | 🎯 ตอบแล้ว 0/499 ข้อ
✅ คำตอบ:
36deab86-cfd3-48b5-9bea-a36c1b0e63a8,C
2b5bbd26-45e8-4768-ab8a-b5dc1d153ab7,B
8a722080-bc16-49db-89c9-100cd61cd28a,A
75316e95-88f4-4fef-83b9-dde0aa52889a,A
bcca13bc-2675-4645-82cc-7e4c412ed294,Fall
ff5b5d2e-5fa1-4709-a9a7-681e4d4585bd,C
d7a45917-d0f9-476e-912d-ebc5af9333a1,B
e625dbc8-f448-4c53-9a78-6c3f351b49c3,Fall


🟡 Batch 2/89: 4 ข้อ | 🎯 ตอบแล้ว 8/499 ข้อ
✅ คำตอบ:
9bea42e5-3c21-46dc-93f7-0017f382f7cf,Fall
0925a4d7-8546-46a8-834f-20f58f16bc99,C
dc0aa42f-569d-4277-8149-b645f3cf9888,A
b9964445-c648-4661-ad85-7e5e4cd0feb4,Rise


🟡 Batch 3/89: 5 ข้อ | 🎯 ตอบแล้ว 12/499 ข้อ
✅ คำตอบ:
a803daca-2cab-4d53-be68-c75fb71da84a,Rise
1ca64702-d7d7-4a9a-987a-4e58938a3b96,B
6caca908-0f01-43b8-a2f4-674d30d03891,C
4485f013-35ce-4f02-92a9-ae8299565de5,C
81747de9-22c1-47e7-a6c2-36116f90d772,B


🟡 Batch 4/89: 5 ข้อ | 🎯 ตอบแล้ว 17/499 ข้อ
✅ คำตอบ:
5dca8ccf-cfa3-4b2f-943c-0d4a28cadf46,Rise
aa5ad602-ac8c-428f-8d08-c7170638b851,A
e4eb6222-3aeb-4dec-b5ea-3d66