# Agent used to select the best set of alpha factors

In [78]:
import re
import json
import time
import os
from ollama import Client  # Import the Ollama client
from tqdm import tqdm

# Initialize the Ollama client
client = Client(host='http://localhost:11434')  # Adjust host if needed

log_path = "./log/"
log_file = ""


def retry_until_expected(run, thread_id, expect):
    """
    This function needs to be adapted for Ollama's run status mechanism.
    Ollama's Assistants API might have a different way of tracking run status.
    For now, we'll just have a simple delay as Ollama typically runs quickly locally.
    Consider removing this or implementing a more suitable check if Ollama provides status updates.
    """
    print(f"Waiting for Ollama to complete the run...")
    time.sleep(5)  # Adjust the delay as needed


def get_last_text_message(thread_id):
    """
    Ollama's API response structure for messages might be different.
    This function needs to be adapted to extract the last text message from an Ollama thread.
    Assuming Ollama's message structure is similar, this might work, but verify.
    """
    messages = client.chat(model="llama3.2", messages=thread_id) # Assuming thread_id holds the conversation history
    if messages and messages['message']['content']:
        return messages['message']['content']
    return None


def log_to_file(type, message):
    if type == "input":
        message = (
            ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
            + "\n"
        ) + message
    elif type == "output":
        message = (
            "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
            + "\n"
        ) + message
    os.makedirs(log_path, exist_ok=True)
    with open(log_path + log_file, "a", encoding="utf-8") as file:
        file.write(message + "\n")


def compare(index1_path, index2_path):
    with open("./data/prompt/0-instruction.md", "r", encoding="utf-8") as file:
        instruction = file.read()

    # Ollama doesn't have the concept of persistent assistants like OpenAI.
    # We'll simulate the assistant behavior within each 'compare' call.
    # The 'instruction' will be part of the initial prompt.

    # We'll maintain a list of messages to represent the thread
    thread_messages = []

    # first message
    with open("./data/prompt/1-preamble.md", "r", encoding="utf-8") as file:
        content = file.read()
    log_to_file("input", content)
    thread_messages.append({"role": "user", "content": content})
    response = client.chat(model="llama3.2", messages=thread_messages)
    log_to_file("output", response['message']['content'])
    thread_messages.append(response['message'])

    # --- second message - 上证50 ---
    with open("./data/prompt/2-上証50.md", "r", encoding="utf-8") as file:
        content = file.read()
    log_to_file("input", content)
    log_to_file("input", "./data/上証50.xlsx")
    try:
        # Read the Excel file (adjust 'sheet_name' and how you extract relevant info)
        shangzheng_data = pd.read_excel("./data/上証50.xlsx", sheet_name="Sheet1").head().to_string() # Example: Read first few rows
        content_with_file = f"{content}\nData from ./data/上証50.xlsx (first few rows):\n{shangzheng_data}"
    except Exception as e:
        content_with_file = f"{content}\nCould not read data from ./data/上証50.xlsx: {e}"
    thread_messages.append({"role": "user", "content": content_with_file})
    response = client.chat(model="llama3.2", messages=thread_messages)
    log_to_file("output", response['message']['content'])
    thread_messages.append(response['message'])

    # third message - index 1
    with open("./data/prompt/3-index1.md", "r", encoding="utf-8") as file:
        content = file.read()
    log_to_file("input", content)
    log_to_file("input", index1_path)
    # Similar to the 上证50.xlsx, we'll include a placeholder for the file content.
    content_with_file = content + f"\n(Data from {index1_path} will be provided in the next step)"
    thread_messages.append({"role": "user", "content": content_with_file})
    response = client.chat(model="llama3.2", messages=thread_messages)
    log_to_file("output", response['message']['content'])
    thread_messages.append(response['message'])

    # fourth message - index 2
    with open("./data/prompt/4-index2.md", "r", encoding="utf-8") as file:
        content = file.read()
    log_to_file("input", content)
    log_to_file("input", index2_path)
    # Now, we'll provide the content of both index files and ask for comparison.
    try:
        with open(index1_path, "r", encoding="utf-8") as f1:
            index1_content = f1.read()
    except Exception as e:
        index1_content = f"Could not read file: {index1_path} - {e}"

    try:
        with open(index2_path, "r", encoding="utf-8") as f2:
            index2_content = f2.read()
    except Exception as e:
        index2_content = f"Could not read file: {index2_path} - {e}"

    comparison_prompt = f"""{content}

    Data from {index1_path}:

    {index1_content}

    Data from {index2_path}:

    {index2_content}

    YOU MUST NOT GENERATE ANY PYTHON CODES.

    Based on the above data and any insights from the 上证50 data provided earlier, which index is better? Please respond with only the number "1" if {os.path.basename(index1_path)} is better, or the number "2" if {os.path.basename(index2_path)} is better.
    """
    thread_messages.append({"role": "user", "content": comparison_prompt})
    response = client.chat(model="llama3.2", messages=thread_messages)
    log_to_file("output", response['message']['content'])
    thread_messages.append(response['message'])

    # --- Improved decision extraction using regex ---
    index_match = re.search(r"\b(1|2)\b", response['message']['content'])
    if index_match:
        index = index_match.group(1)
    else:
        print(f"Warning: Could not determine the better index from the response: {response['message']['content']}")
        index = "1" # Default to 1 if unsure

    log_to_file("output", f"The selected better alpha's index is: {index}")
    return index


# list files in ./data/alpha-result/
files = os.listdir("./data/alpha-result/")
files = [f for f in files if f.endswith(".xlsx") or f.endswith(".csv") or f.endswith(".txt")] # Add other relevant file extensions

best_file = files[0]
best_file_index = 1
round_num = 1
for i, file in enumerate(tqdm(files[1:])):
    index = i + 2
    log_file = f"round-{round_num}-{best_file_index}-{index}.log"
    best_index = compare(
        f"./data/alpha-result/{best_file}", f"./data/alpha-result/{file}"
    )
    if best_index == "2":
        best_file = file
        best_file_index = index
    round_num += 1

print(f"The best alpha is: {best_file}")



  0%|          | 0/7 [00:00<?, ?it/s]

 71%|███████▏  | 5/7 [02:25<01:00, 30.01s/it]


From the above analysis of the two Excel files, it appears that both 13.xlsx and 10.xlsx have a more complex structure than the SSE 50 index data. The SSE 50 index data had a clear table header with straightforward column names, whereas the alpha performance data has a more intricate structure with multiple sheets and columns.

However, if we look at the data provided for each sheet, it seems that both alphas have varying levels of performance. Alpha 13 (from 13.xlsx) appears to have higher values in some columns compared to alpha 10 (from 10.xlsx), while alpha 10 has more consistent growth in other columns.

Without further analysis or additional data, it's difficult to definitively say which alpha performs better. However, based on the insights gained from the SSE 50 index data and the structure of the alpha performance data, I would lean towards alpha 13 (from 13.xlsx) being potentially better due to its higher values in some columns.

Please note that this is a high-level analysis

100%|██████████| 7/7 [03:49<00:00, 32.85s/it]

The best alpha is: 20.xlsx



