In [1]:
import json
import os
import SharedArray as sa
from typing import Optional
from datetime import datetime
import json
import random
import numpy as np
import torch

from alphagen.data.expression import *
from alphagen_ocean.calculator_ import QLibStockDataCalculator
from alphagen_ocean.stock_data_ import StockData
from alphagen.config import *

In [2]:
device = torch.device("cpu")
cuda = torch.device("cuda:3")
data_test = StockData(
    start_time=20210601,
    end_time=20211201,
    device=device,
)

In [3]:
import json

path = "checkpoints/ocean_lexpr8_lopt34_10_949_20230810173533/59392_steps_pool.json"
with open(path, "r") as f:
    alpha = json.load(f)

In [4]:
alpha["exprs"]

['$qcumvolume',
 '$qcne5d_spec_ret',
 'Abs(Greater(DeStd($qsize_ret_l),Mad($qcne5d_spec_risk,10)))',
 '$qmoneyflow_pct_value_l',
 '$qintra_spreadavgmidp11',
 '$qsell_value_med_order_act',
 '$qcne5d_sizenl',
 '$qintra_deltanetbuy_5_stdev',
 '$qmoneyflow_pct_value',
 '$qbuy_value_large_order_act']

In [5]:
from alphagen.data.tree import ExpressionBuilder
from alphagen.data.tokens import *

builder = ExpressionBuilder()
builder.add_token(FeatureToken(FeatureType.qsize_ret_diff))
builder.add_token(OperatorToken(DeStd))
builder.add_token(FeatureToken(FeatureType.qcne5d_spec_risk))
builder.add_token(DeltaTimeToken(10))
builder.add_token(OperatorToken(Mad))
builder.add_token(OperatorToken(Greater))
builder.add_token(OperatorToken(Abs))
builder.is_valid()

True

In [6]:
factors = [0] * 10
for i in range(10):
    if alpha["exprs"][i][0] == "$":
        factors[i] = Feature(getattr(FeatureType, alpha["exprs"][i][1:])).evaluate(
            data_test
        )

In [7]:
factors[2] = builder.get_tree().evaluate(data_test)

In [8]:
weights = torch.Tensor(alpha["weights"])

In [9]:
weights

tensor([-0.0190, -0.0354, -0.0127,  0.0106, -0.0159, -0.0098, -0.0090, -0.0092,
         0.0150,  0.0178])

In [10]:
factor_value = torch.zeros_like(factors[0])
for i in range(10):
    factor_value += factors[i] * weights[i]

In [11]:
from alphagen.utils.pytorch_utils import normalize_by_day

In [12]:
yhat = normalize_by_day(factor_value)

In [13]:
cal_test = QLibStockDataCalculator(data_test)

mem of retx:84.136962890625 MB


In [14]:
y1d = cal_test.ret1d
y2d = cal_test.ret2d
y5d = cal_test.ret5d

In [15]:
from alphagen.utils.correlation import batch_pearsonr, batch_spearmanr

In [16]:
for y in [y1d, y2d, y5d]:
    print(torch.mean(batch_pearsonr(yhat, y)))
    print(torch.mean(batch_spearmanr(yhat, y)))

batch_pearsonr cost 0.38831043243408203 seconds
tensor(0.0080, dtype=torch.float64)
batch_spearmanr cost 548.7186081409454 seconds
tensor(0.0503)
batch_pearsonr cost 0.1422739028930664 seconds
tensor(0.0100, dtype=torch.float64)
batch_spearmanr cost 574.142594575882 seconds
tensor(0.0490)
batch_pearsonr cost 0.09109711647033691 seconds
tensor(0.0100, dtype=torch.float64)
batch_spearmanr cost 572.6553502082825 seconds
tensor(0.0490)


In [17]:
cuda = torch.device("cuda:1")
for y in [y1d, y2d, y5d]:
    print(torch.mean(batch_pearsonr(yhat.to(cuda), y.to(cuda))))
    print(torch.mean(batch_spearmanr(yhat.to(cuda), y.to(cuda))))

batch_pearsonr cost 0.005568504333496094 seconds
tensor(0.0080, device='cuda:1', dtype=torch.float64)


OutOfMemoryError: CUDA out of memory. Tried to allocate 61.62 GiB (GPU 1; 11.77 GiB total capacity; 305.01 MiB already allocated; 9.37 GiB free; 318.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
from alphagen.data.tree import ExpressionBuilder
from alphagen.data.tokens import *
from alphagen_ocean.stock_data_ import FeatureType
from ocean_common.feature_list import FEATURES
from alphagen.config import *


def tokenize_formula(formula: str) -> List[str]:
    # This is a simple tokenizer. For more complex formulas, you might want to use regex or other methods.
    tokens = []
    token = ""
    for char in formula:
        if char in ["(", ")", ","]:
            if token:
                tokens.append(token)
                token = ""
            tokens.append(char)
        else:
            token += char
    if token:
        tokens.append(token)
    return tokens


def formula_to_expression(
    formula: str,
    operators: List[Type[Operator]] = OPERATORS,
    features: List[FeatureType] = FEATURES,
) -> Expression:
    # Tokenize the formula
    tokens = tokenize_formula(formula)

    # Create an ExpressionBuilder
    builder = ExpressionBuilder()

    # Convert tokens to ExpressionBuilder tokens and add them
    for token in tokens:
        if token.startswith("$"):
            feature_type = getattr(FeatureType, token[1:])
            if feature_type in features:
                builder.add_token(FeatureToken(feature_type))
            else:
                raise ValueError(f"Unknown feature: {token}")
        elif token.isdigit() or (token[0] == "-" and token[1:].isdigit()):
            builder.add_token(DeltaTimeToken(int(token)))
        else:
            operator_class = next(
                (op for op in operators if op.__name__ == token), None
            )
            if operator_class:
                builder.add_token(OperatorToken(operator_class))
            else:
                raise ValueError(f"Unknown operator: {token}")

    if not builder.is_valid():
        raise ValueError("Invalid formula")

    return builder.get_tree()


# Test
formula = "Abs(Greater(DeStd($qsize_ret_l),Mad($qcne5d_spec_risk,10)))"
operators = [Abs, Greater, DeStd, Mad]
features = list(FeatureType)
expression = formula_to_expression(formula, operators, features)
print(expression)