In [None]:
pip install -U google-generativeai

In [None]:
pip install google-generativeai langchain langchain-google-genai

單純串 Gemini 1.5 API


In [None]:
import pandas as pd
import time
import re
import random
import google.generativeai as genai

API_KEY = "AIzaSyDAI6yk07b0ocssMsy4o2AM3RdSWnHqf_M"
genai.configure(api_key=API_KEY)

sample_df = pd.read_csv("mmlu_sample.csv", sep=",")  # 訓練集
submit_df = pd.read_csv("mmlu_submit.csv", sep=",")  # 測試集
submit_format_df = pd.read_csv("submit_format.csv")  # 參考輸出格式

# 產生 LLM 訓練 Prompt（以 mmlu_sample.csv 作為 few-shot learning）
few_shot_count = 10
few_shot_examples = sample_df.sample(few_shot_count, random_state=42)

training_examples = []
for _, row in few_shot_examples.iterrows():
    reasoning_text = (
        "Carefully analyze the question, the choices, "
        "and provide a concise reasoning process to arrive at the correct answer. "
        f"In this case, the correct answer is {row['target']}."
    )

    example = f"""
You are a helpful AI assistant using zero-shot CoT reasoning. 
When you solve the following multiple-choice question, silently reason through the problem, 
but provide a brief justification (few-shot style). 
DO NOT reveal your entire chain-of-thought.

Question: {row['input']}
A: {row['A']}
B: {row['B']}
C: {row['C']}
D: {row['D']}

Reasoning: {reasoning_text}

Correct Answer: {row['target']}

DO NOT reply instantly– if you have any questions about this prompt, ask me.
""".strip()

    training_examples.append(example)


In [None]:
import time

training_prompt = "\n\n".join(training_examples)

predictions = []
count_requests = 0  
daily_limit = 1500

for _, row in submit_df.iterrows():
    question_prompt = f"""
You are a helpful AI assistant. Below are examples of how to answer multiple-choice questions with a short reasoning process 
(few-shot prompts, zero-shot CoT):

{training_prompt}

Instructions:
1. Carefully analyze the question and each answer choice.
2. Silently reason through the steps (zero-shot CoT), but provide only a brief final reasoning explanation.
3. Output your final answer in the exact format:
   Correct answer: X
   (Where X is A, B, C, or D)
4. DO NOT reply instantly – if you have any questions about this prompt, ask me.

Question: {row['input']}
A: {row['A']}
B: {row['B']}
C: {row['C']}
D: {row['D']}
""".strip()

    model = genai.GenerativeModel("gemini-1.5-flash")

    response = model.generate_content(
        question_prompt,
        generation_config={
            "temperature": 0.8,
            "top_p": 0.9,
            "presence_penalty": 0.1,
            "frequency_penalty": 0.1
        }
    )

    raw_answer = response.text.strip()
    
    if "Correct answer:" in raw_answer:
        # 分割後，取冒號後的第一個字母
        answer_part = raw_answer.split("Correct answer:")[-1].strip()
        predicted_option = answer_part[0].upper()  # 取A/B/C/D
        if predicted_option not in ["A", "B", "C", "D"]:
            predicted_option = "A" 
    else:
        predicted_option = "A"
    
    predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})
    
    count_requests += 1
    time.sleep(4) 
    
    if count_requests >= daily_limit:
        print(f"Reached the daily limit of {daily_limit} requests. Stopping...")
        break

output_df = pd.DataFrame(predictions)
output_df.to_csv("submit_format.csv", index=False)

print("Results saved to submit_format.csv")

利用 Langchain 中的 Gemini 2.0 + CoT + 動態挑選 few-shot 題目

In [None]:
import os
import pandas as pd
import time
import random
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.output_parsers import RegexParser

os.environ["GOOGLE_API_KEY"] = "AIzaSyDAI6yk07b0ocssMsy4o2AM3RdSWnHqf_M"

sample_df = pd.read_csv("mmlu_sample.csv", sep=",")  # 訓練集
submit_df = pd.read_csv("mmlu_submit.csv", sep=",")  # 測試集

few_shot_count = 5

# 定義題目類型特定的策略
category_strategies = {
    "high_school_biology": "Focus on biological concepts, terminology, and processes.",
    "high_school_computer_science": "Analyze logic, algorithms, and programming principles.",
    "high_school_european_history": "Consider historical context, timelines, and cause-effect relationships.",
    "high_school_geography": "Focus on spatial relationships, physical features, and human-environment interactions.",
    "high_school_government_and_politics": "Evaluate political systems, institutions, and principles.",
    "high_school_macroeconomics": "Analyze economic principles, markets, and policies.",
    "high_school_microeconomics": "Focus on individual markets, supply-demand, and decision-making.",
    "high_school_psychology": "Consider human behavior, mental processes, and psychological theories.",
    "high_school_us_history": "Examine key events, figures, and developments in U.S. history.",
    "high_school_world_history": "Evaluate global events, cultures, and historical trends."
}

role_dict = {
    "high_school_biology": "a high school biology teacher",
    "high_school_computer_science": "a computer science professor",
    "high_school_european_history": "a European history expert",
    "high_school_geography": "a geography educator",
    "high_school_government_and_politics": "a political science scholar",
    "high_school_macroeconomics": "an economics professor specializing in macroeconomics",
    "high_school_microeconomics": "an economics professor specializing in microeconomics",
    "high_school_psychology": "a psychology instructor",
    "high_school_us_history": "a U.S. history expert",
    "high_school_world_history": "a world history specialist"
}

prompt_template = PromptTemplate(
    template="""
You are {role} specializing in solving multiple-choice questions with high accuracy. The current question is from {task}.

🔹 **Rules**:
1. Output only the final answer in this exact format: 'Correct answer: X' (where X is A, B, C, or D).
2. Do NOT include reasoning, justifications, or additional text in your output.
3. Think step-by-step internally:
   - Identify the key concepts or facts in the question.
   - Evaluate each option based on those concepts.
   - Eliminate incorrect options systematically.
   - Internally, imagine explaining the answer to a student, considering each option carefully.
4. Use these strategies for {task} questions: {task_strategy}.
5. For each option, consider why it might be correct or incorrect, and compare it to the others.
6. Double-check your conclusion by revisiting the question and options to ensure accuracy.

📌 **Examples:**
{few_shot}

Now solve this question:

Question: {question}
A) {A}
B) {B}
C) {C}
D) {D}
""",
    input_variables=["role", "task", "task_strategy", "few_shot", "question", "A", "B", "C", "D"]
)

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)  
chain = prompt_template | llm 
output_parser = RegexParser(regex=r"Correct answer: ([A-D])", output_keys=["target"])

# 函數 提取答案
def extract_answer(response):
    patterns = [
        r"Correct answer:\s*([A-D])",
        r"Answer:\s*([A-D])",
        r"The correct option is\s*([A-D])",
        r"I choose\s*([A-D])",
        r"Final answer:\s*([A-D])"
    ]
    for pattern in patterns:
        match = re.search(pattern, response, re.IGNORECASE)
        if match:
            return match.group(1).upper()
    lines = response.strip().split('\n')
    last_line = lines[-1].strip()
    match = re.search(r"[A-D]", last_line)
    if match:
        return match.group(0).upper()
    return random.choice(["A", "B", "C", "D"])

predictions = []
errors = []
count_requests = 0
daily_limit = 1500

for _, row in submit_df.iterrows():
    # 根據當前題目的 task 動態挑選 Few-shot 範例
    task = row["task"]
    task_examples = sample_df[sample_df["task"] == task]
    few_shot_examples = task_examples.sample(min(few_shot_count, len(task_examples)), random_state=42)
    few_shot_text = "\n".join([
        f"""Example {i+1}:
Question: {ex['input']}
A) {ex['A']}
B) {ex['B']}
C) {ex['C']}
D) {ex['D']}
Correct Answer: {ex['target']}""" for i, (_, ex) in enumerate(few_shot_examples.iterrows())
    ])

    # 動態選擇 task 對應的策略和角色
    task_strategy = category_strategies.get(task, "Use general knowledge and logical reasoning.")
    role = role_dict.get(task, "an expert in general knowledge")

    question_data = {
        "role": role,
        "task": task,
        "task_strategy": task_strategy,
        "few_shot": few_shot_text,
        "question": row["input"],
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    # 呼叫 LLM 產生回答
    response = chain.invoke(question_data).content
    # 提取答案
    predicted_option = extract_answer(response)
    
    if predicted_option in ["A", "B", "C", "D"]:
        predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})
    else:
        predicted_option = random.choice(["A", "B", "C", "D"])
        errors.append({
            "ID": row["Unnamed: 0"],
            "input": row["input"],
            "response": response,
            "reason": "無法解析"
        })
        predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})
    
    count_requests += 1
    time.sleep(4)
    
    if count_requests >= daily_limit:
        print(f"Reached the daily limit of {daily_limit} requests. Stopping...")
        break

output_df = pd.DataFrame(predictions)
output_df.to_csv("submit_format.csv", index=False)

print("Results saved to submit_format.csv")

In [None]:
import os
import pandas as pd
import time
import random
import re
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.output_parsers import RegexParser
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

os.environ["GOOGLE_API_KEY"] = "AIzaSyDAI6yk07b0ocssMsy4o2AM3RdSWnHqf_M"

sample_df = pd.read_csv("mmlu_sample.csv", sep=",")  # 訓練集
submit_df = pd.read_csv("mmlu_submit.csv", sep=",")  # 測試集

few_shot_count = 5

# 定義題目類型特定的策略
category_strategies = {
    "high_school_biology": "Delve into fundamental biological concepts, technical terminology, and biological processes.",
    "high_school_computer_science": "Analyze programming logic, algorithms, and principles of software design.",
    "high_school_european_history": "Examine European historical contexts, key events, and their cause-effect relationships.",
    "high_school_geography": "Focus on spatial distributions, natural landforms, and human-environment interactions.",
    "high_school_government_and_politics": "Evaluate political systems, governmental operations, and core political theories.",
    "high_school_macroeconomics": "Explore macroeconomic principles, market dynamics, and the impact of policies.",
    "high_school_microeconomics": "Concentrate on individual markets, supply-demand interactions, and consumer behavior.",
    "high_school_psychology": "Study psychological theories, behavioral patterns, and cognitive processes.",
    "high_school_us_history": "Review significant events, figures, and developmental trends in U.S. history.",
    "high_school_world_history": "Assess global historical trends, cultural exchanges, and international influences."
}

role_dict = {
    "high_school_biology": "a high school biology teacher",
    "high_school_computer_science": "a computer science professor",
    "high_school_european_history": "a European history expert",
    "high_school_geography": "a geography educator",
    "high_school_government_and_politics": "a political science scholar",
    "high_school_macroeconomics": "a macroeconomics professor",
    "high_school_microeconomics": "a microeconomics professor",
    "high_school_psychology": "a psychology instructor",
    "high_school_us_history": "a U.S. history expert",
    "high_school_world_history": "a world history specialist"
}

prompt_template = PromptTemplate(
    template="""
You are {role} specializing in solving multiple-choice questions with high accuracy. The current question is from {task}.

🔹 **Rules**:
1. Output only the final answer in this exact format: 'Correct answer: X' (where X is A, B, C, or D).
2. Include only the output, avoiding reasoning, justifications, or additional text.
3. Think step-by-step internally:
   - Step 1: Identify the core concept or fact the question is testing.
   - Step 2: For each option, assess its alignment with the core concept.
   - Step 3: Eliminate options that contradict or misinterpret the concept, considering why they might be incorrect.
   - Step 4: Check for potential trickery or common misconceptions in the options.
   - Step 5: Confirm the remaining option by ensuring it directly answers the question.
4. Use these strategies for {task} questions: {task_strategy}.
5. Pay attention to subtle differences between options to avoid falling for distractors.
6. Double-check your conclusion by revisiting the question and options to ensure accuracy.

📌 **Examples:**
These examples are selected to provide context and illustrate common patterns in {task} questions:
{few_shot}

Now solve this question:

Question: {question}
A) {A}
B) {B}
C) {C}
D) {D}
""",
    input_variables=["role", "task", "task_strategy", "few_shot", "question", "A", "B", "C", "D"]
)

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.1)  
chain = prompt_template | llm 
output_parser = RegexParser(regex=r"Correct answer: ([A-D])", output_keys=["target"])

def extract_answer(response):
    patterns = [
        r"Correct answer:\s*([A-D])",
        r"Answer:\s*([A-D])",
        r"The correct option is\s*([A-D])",
        r"I choose\s*([A-D])",
        r"Final answer:\s*([A-D])"
    ]
    for pattern in patterns:
        match = re.search(pattern, response, re.IGNORECASE)
        if match:
            return match.group(1).upper()
    lines = response.strip().split('\n')
    last_line = lines[-1].strip()
    match = re.search(r"[A-D]", last_line)
    if match:
        return match.group(0).upper()
    return random.choice(["A", "B", "C", "D"])

def get_similar_examples(question, task_examples, n=4):
    vectorizer = TfidfVectorizer()
    task_texts = task_examples["input"].tolist()
    all_texts = [question] + task_texts
    tfidf_matrix = vectorizer.fit_transform(all_texts)
    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]
    top_indices = similarities.argsort()[-n:][::-1]
    return task_examples.iloc[top_indices]

predictions = []
errors = []
count_requests = 0
daily_limit = 1500

for _, row in submit_df.iterrows():
    # 根據當前題目的 task 動態挑選 Few-shot 範例
    task = row["task"]
    task_examples = sample_df[sample_df["task"] == task]
    
    # 2 個相似範例 + 3 個隨機範例
    similar_examples = get_similar_examples(row["input"], task_examples, 2)
    random_examples = task_examples.sample(min(3, len(task_examples)), random_state=42)
    few_shot_examples = pd.concat([similar_examples, random_examples]).drop_duplicates()
    
    # 直接從 task_examples 中隨機選取當作範例
    # few_shot_examples = task_examples.sample(min(few_shot_count, len(task_examples)), random_state=42)
    
    few_shot_text = "\n".join([
        f"""Example {i+1}:
Question: {ex['input']}
A) {ex['A']}
B) {ex['B']}
C) {ex['C']}
D) {ex['D']}
Correct Answer: {ex['target']}""" for i, (_, ex) in enumerate(few_shot_examples.iterrows())
    ])

    # 動態選擇 task 對應的策略和角色
    task_strategy = category_strategies.get(
        task,
        "Implement a comprehensive strategy that leverages detailed domain expertise and rigorous logical reasoning."
    )
    role = role_dict.get(
        task,
        "a seasoned expert with extensive domain-specific knowledge and analytical skills"
    )

    question_data = {
        "role": role,
        "task": task,
        "task_strategy": task_strategy,
        "few_shot": few_shot_text,
        "question": row["input"],
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    response = chain.invoke(question_data).content
    predicted_option = extract_answer(response)
    
    if predicted_option in ["A", "B", "C", "D"]:
        predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})
    else:
        predicted_option = random.choice(["A", "B", "C", "D"])
        errors.append({
            "ID": row["Unnamed: 0"],
            "input": row["input"],
            "response": response,
            "reason": "無法解析"
        })
        predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})
    
    count_requests += 1
    time.sleep(4)
    
    if count_requests >= daily_limit:
        print(f"Reached the daily limit of {daily_limit} requests. Stopping...")
        break

output_df = pd.DataFrame(predictions)
output_df.to_csv("submit_format.csv", index=False)

print("Results saved to submit_format.csv")

Gemini-1.5-pro 版本

In [None]:
import os
import pandas as pd
import time
import random
import re
import sys
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.output_parsers import RegexParser

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# os.environ["GOOGLE_API_KEY"] = "AIzaSyCWX4Zzt0Pe7269rkSgvVR4uOKxu2fuCvQ"
os.environ["GOOGLE_API_KEY"] = "AIzaSyDAI6yk07b0ocssMsy4o2AM3RdSWnHqf_M" 
# os.environ["GOOGLE_API_KEY"] = "AIzaSyAX5JPYCBANYsHSx3-3W-aDq3_H3VQsKeM" 

sample_df = pd.read_csv("mmlu_sample.csv", sep=",")  # 訓練集
submit_df = pd.read_csv("mmlu_submit.csv", sep=",")  # 測試集


category_strategies = {
    "high_school_biology": "Focus on biological concepts, terminology, and processes.",
    "high_school_computer_science": "Analyze logic, algorithms, and programming principles.",
    "high_school_european_history": "Consider historical context, timelines, and cause-effect relationships.",
    "high_school_geography": "Focus on spatial relationships, physical features, and human-environment interactions.",
    "high_school_government_and_politics": "Evaluate political systems, institutions, and principles.",
    "high_school_macroeconomics": "Analyze economic principles, markets, and policies.",
    "high_school_microeconomics": "Focus on individual markets, supply-demand, and decision-making.",
    "high_school_psychology": "Consider human behavior, mental processes, and psychological theories.",
    "high_school_us_history": "Examine key events, figures, and developments in U.S. history.",
    "high_school_world_history": "Evaluate global events, cultures, and historical trends."
}

role_dict = {
    "high_school_biology": "a high school biology teacher",
    "high_school_computer_science": "a computer science professor",
    "high_school_european_history": "a European history expert",
    "high_school_geography": "a geography educator",
    "high_school_government_and_politics": "a political science scholar",
    "high_school_macroeconomics": "an economics professor specializing in macroeconomics",
    "high_school_microeconomics": "an economics professor specializing in microeconomics",
    "high_school_psychology": "a psychology instructor",
    "high_school_us_history": "a U.S. history expert",
    "high_school_world_history": "a world history specialist"
}


prompt_template = PromptTemplate(
    template="""
You are {role} specializing in solving multiple-choice questions with high accuracy. The current question is from {task}.

🔹 **Rules**:
1. Output only the final answer in this exact format: 'Correct answer: X' (where X is A, B, C, or D).
2. Do NOT include reasoning, justifications, or additional text in your output.
3. Think step-by-step internally:
   - Step 1: Identify the key topic or concept in the question.
   - Step 2: Recall relevant facts, definitions, or principles related to {task}.
   - Step 3: Evaluate each option by comparing it to the identified concepts.
   - Step 4: Eliminate incorrect options based on inaccuracies or inconsistencies.
   - Step 5: Double-check the remaining option(s) against the question to confirm accuracy.
4. Use these strategies for {task} questions: {task_strategy}.
5. Pay attention to subtle differences between options to avoid common pitfalls.
6. Imagine explaining your choice to a student, ensuring every step is clear and logical.

📌 **Examples:**
{few_shot}

Now solve this question:

Question: {question}
A) {A}
B) {B}
C) {C}
D) {D}
""",
    input_variables=["role", "task", "task_strategy", "few_shot", "question", "A", "B", "C", "D"]
)

def extract_answer(response):
    patterns = [
        r"Correct answer:\s*([A-D])",
        r"Answer:\s*([A-D])",
        r"The correct option is\s*([A-D])",
        r"I choose\s*([A-D])",
        r"Final answer:\s*([A-D])"
    ]
    for pattern in patterns:
        match = re.search(pattern, response, re.IGNORECASE)
        if match:
            return match.group(1).upper()
    lines = response.strip().split('\n')
    last_line = lines[-1].strip()
    match = re.search(r"[A-D]", last_line)
    if match:
        return match.group(0).upper()
    return random.choice(["A", "B", "C", "D"])


def get_similar_examples(current_question, task_examples, few_shot_count=5):
    if len(task_examples) <= few_shot_count:
        return task_examples  # 如果範例數量不足，直接返回所有範例
    
    # 使用 TF-IDF 計算相似度
    vectorizer = TfidfVectorizer(stop_words='english')  
    current_text = current_question.lower()
    example_texts = task_examples["input"].str.lower().tolist()
    tfidf_matrix = vectorizer.fit_transform([current_text] + example_texts)
    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    
    top_indices = similarities.argsort()[-few_shot_count:][::-1]
    return task_examples.iloc[top_indices]


llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.2,
    max_output_tokens=50  
)
chain = prompt_template | llm 
output_parser = RegexParser(regex=r"Correct answer: ([A-D])", output_keys=["target"])


def process_batch(batch_data, predictions, errors):
    batch_inputs = []
    for row in batch_data:
        task = row["task"]
        task_examples = sample_df[sample_df["task"] == task]
        few_shot_examples = get_similar_examples(row["input"], task_examples, few_shot_count=3)
        few_shot_text = "\n".join([
            f"Example {i+1}:\nQuestion: {ex['input']}\nA) {ex['A']}\nB) {ex['B']}\nC) {ex['C']}\nD) {ex['D']}\nCorrect Answer: {ex['target']}"
            for i, (_, ex) in enumerate(few_shot_examples.iterrows())
        ])
        task_strategy = category_strategies.get(task, "Use general knowledge and logical reasoning.")
        role = role_dict.get(task, "an expert in general knowledge")
        
        question_data = {
            "role": role,
            "task": task,
            "task_strategy": task_strategy,
            "few_shot": few_shot_text,
            "question": row["input"],
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }
        batch_inputs.append(question_data)

    try:
        responses = [chain.invoke(input_data).content for input_data in batch_inputs]
        for row, response in zip(batch_data, responses):
            predicted_option = extract_answer(response)
            if predicted_option in ["A", "B", "C", "D"]:
                predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})
            else:
                predicted_option = random.choice(["A", "B", "C", "D"])
                errors.append({
                    "ID": row["Unnamed: 0"],
                    "input": row["input"],
                    "response": response,
                    "reason": f"Invalid answer format: {predicted_option}"
                })
                predictions.append({"ID": row["Unnamed: 0"], "target": predicted_option})

    except Exception as e:
        error_msg = str(e).lower()
        
        # 檢查是否是因為配額用盡
        # 如果是，則儲存當前進度並退出
        if "quota" in error_msg or "exhausted" in error_msg or "limit" in error_msg:
            print(f"Quota exhausted detected: {str(e)}")
            print("Saving current progress and exiting")
            output_df = pd.DataFrame(predictions)
            output_df.to_csv("submit_format_partial.csv", index=False)
            if errors:
                error_df = pd.DataFrame(errors)
                error_df.to_csv("errors_partial.csv", index=False)
            print("Partial results saved to 'submit_format_partial.csv'. Exiting program.")
            sys.exit(0)
        else:
            for row in batch_data:
                errors.append({
                    "ID": row["Unnamed: 0"],
                    "input": row["input"],
                    "response": str(e),
                    "reason": "LLM invocation failed"
                })
                predictions.append({"ID": row["Unnamed: 0"], "target": random.choice(["A", "B", "C", "D"])})

predictions = []
errors = []
count_requests = 0
daily_limit = 1500
batch_size = 1
start_index = 145  # 從第49題開始 (索引從0開始，第49題是48)

# 讀取之前的預測結果
if os.path.exists("submit_format_partial.csv"):
    previous_df = pd.read_csv("submit_format_partial.csv")
    predictions = previous_df.to_dict(orient="records")
    count_requests = len(predictions)
    print(f"Loaded {count_requests} previous predictions from 'submit_format_partial.csv'.")
else:
    print("No previous results found. Starting from scratch.")

for i in range(start_index, len(submit_df), batch_size):
    batch_data = submit_df.iloc[i:i + batch_size].to_dict(orient="records")
    process_batch(batch_data, predictions, errors)
    count_requests += len(batch_data)

    if count_requests % 100 == 0:
        print(f"Processed {count_requests} requests...")
    
    if count_requests >= daily_limit:
        print(f"Reached the daily limit of {daily_limit} requests. Stopping...")
        break
    
    time.sleep(30)  

output_df = pd.DataFrame(predictions)
output_df.to_csv("submit_format.csv", index=False)

print("Results saved to submit_format.csv")