In [1]:
import re
import pandas as pd
import ast
import io


def parse_log_file(file_path):
    with open(file_path, "r") as file:
        content = file.read()

    # Find the start of Arthur's output
    start_marker = "***...Output for Arthur...***"
    start_index = content.find(start_marker)
    if start_index == -1:
        raise ValueError("Arthur's output section not found in the log file")

    # Extract the relevant section
    relevant_content = content[start_index:]

    # Extract base_value
    base_value_match = re.search(r"base_value: (\d+)", relevant_content)
    base_value = int(base_value_match.group(1)) if base_value_match else None

    # Extract initial_prices
    initial_prices_match = re.search(
        r"initial_prices:(.*?)optimizer:",
        relevant_content,
        re.DOTALL,
    )
    if initial_prices_match:
        initial_prices_str = initial_prices_match.group(1).strip()
        # Split the string into lines
        lines = initial_prices_str.split("\n")
        print("Initial Prices Lines:")
        for line in lines:
            print(line)
        # The first line contains column names
        columns = re.split(r"\s+", lines[0].strip())
        print("Columns:", columns)
        # The remaining lines contain data
        data = []
        for line in lines[2:]:  # Start from the third line to skip the 'date' line
            if line.strip():
                row = re.split(r"\s+", line.strip())
                if len(row) == len(columns) + 1:
                    data.append(row)
                    print("Parsed row:", row)  # Print each row being parsed
        # Create DataFrame
        initial_prices = pd.DataFrame(data, columns=["date"] + columns)
        initial_prices.set_index("date", inplace=True)
        initial_prices = initial_prices.apply(pd.to_numeric)
    else:
        initial_prices = None

    # Extract mcaps
    mcaps_match = re.search(
        r"mcaps: (.*?)(?=\n\d{4}-\d{2}-\d{2})", relevant_content, re.DOTALL
    )
    if mcaps_match:
        mcaps_str = mcaps_match.group(1).strip()
        mcaps = {}
        for line in mcaps_str.split("\n"):
            parts = line.split(maxsplit=1)
            if len(parts) == 2:
                try:
                    mcaps[parts[0].strip()] = float(parts[1])
                except ValueError:
                    # Skip lines that can't be converted to float
                    continue
        mcaps = pd.Series(mcaps)
    else:
        mcaps = None

    # Extract max_weight
    max_weight_match = re.search(r"max_weight: ({.*?})", relevant_content, re.DOTALL)
    max_weight = (
        ast.literal_eval(max_weight_match.group(1)) if max_weight_match else None
    )

    # Extract min_weight
    min_weight_match = re.search(r"min_weight: ({.*?})", relevant_content, re.DOTALL)
    min_weight = (
        ast.literal_eval(min_weight_match.group(1)) if min_weight_match else None
    )

    # Extract weight_threshold
    weight_threshold_match = re.search(r"weight_threshold: ([\d.]+)", relevant_content)
    weight_threshold = (
        float(weight_threshold_match.group(1)) if weight_threshold_match else None
    )

    return {
        "base_value": base_value,
        "initial_prices": initial_prices,
        "mcaps": mcaps,
        "max_weight": max_weight,
        "min_weight": min_weight,
        "weight_threshold": weight_threshold,
    }


# Usage
log_file_path = "/Users/arguiot/Downloads/data_collector.log"
parsed_data = parse_log_file(log_file_path)

# Access the parsed data
print("Base Value:", parsed_data["base_value"])
print("\nInitial Prices:\n", parsed_data["initial_prices"])
print("\nMcaps:\n", parsed_data["mcaps"])
print("\nMax Weight:", parsed_data["max_weight"])
print("\nMin Weight:", parsed_data["min_weight"])
print("\nWeight Threshold:", parsed_data["weight_threshold"])

Initial Prices Lines:
ondo         bnb       grt      rose       dot     myria       zrx      cake        uni        aave      pepe       ftm      usdc       rndr      blur       avax       link       axl      doge       fil        ens       arb       ldo      coti    pendle       ton         sol       fxs       ada       pyr      arkm      gala         qnt       imx      flow      usdt         w      beam        rpl       snx      rari      prime     super       chz     audio       kas      dydx          mkr      mana      near      form       ena          eth       rlb       vet     matic       ava        inj       atom       trx     sfund       fet       crv       egld        lpt
date                                                                                                                                                                                                                                                                                                                

In [2]:
from portfolio_optimization.portfolio.Portfolio import Portfolio
from portfolio_optimization.optimization.risk_parity import RiskParity
from main_backtest.delegates import (
    OptRebalancingPortfolioDelegate,
)

portfolio_parity = Portfolio(
    base_value=parsed_data["base_value"],
    initial_prices=parsed_data["initial_prices"],  # Use the filtered initial prices
    optimiser=RiskParity,
    max_weight=parsed_data["max_weight"],  # Use the filtered max_weight, including '*'
    min_weight={"*": 0.0},  # Keep the original min_weight with '*' key
    budget={},
    lambda_var=0.1,
    lambda_u=0.1,
)

chosen_delegate = OptRebalancingPortfolioDelegate()
portfolio_parity.delegate = chosen_delegate

# Print the weights
print("Portfolio weights:")
print(portfolio_parity.weights)

Available assets: {'rndr', 'zrx', 'rari', 'doge', 'dot', 'ada', 'audio', 'usdc', 'sfund', 'usdt', 'pyr', 'rpl', 'trx', 'egld', 'near', 'bnb', 'ton', 'lpt', 'mkr', 'pendle', 'inj', 'grt', 'mana', 'aave', 'ftm', 'fxs', 'dydx', 'coti', 'eth', 'blur', 'ena', 'chz', 'link', 'kas', 'matic', 'crv', 'imx', 'super', 'flow', 'arb', 'axl', 'arkm', 'rlb', 'ens', 'rose', 'sol', 'ldo', 'avax', 'snx', 'gala', 'w', 'fet', 'fil', 'uni', 'atom', 'ondo', 'ava', 'beam', 'vet', 'cake', 'prime', 'qnt', 'pepe', 'myria', 'form'}
Assets to keep: ['rndr', 'zrx', 'rari', 'doge', 'dot', 'ada', 'audio', 'sfund', 'pyr', 'rpl', 'trx', 'egld', 'near', 'bnb', 'ton', 'lpt', 'mkr', 'pendle', 'inj', 'grt', 'mana', 'aave', 'ftm', 'fxs', 'dydx', 'coti', 'blur', 'ena', 'chz', 'link', 'kas', 'matic', 'crv', 'imx', 'super', 'flow', 'arb', 'axl', 'arkm', 'rlb', 'ens', 'rose', 'sol', 'ldo', 'avax', 'snx', 'gala', 'w', 'fet', 'fil', 'uni', 'atom', 'ondo', 'beam', 'vet', 'cake', 'prime', 'qnt', 'pepe', 'myria']
Computed covarianc