In [1]:
import json
import os
from typing import Optional, Tuple, List
from datetime import datetime
from pathlib import Path
from openai import OpenAI
import fire
import pandas as pd

import numpy as np
from sb3_contrib.ppo_mask import MaskablePPO
from stable_baselines3.common.callbacks import BaseCallback

from alphagen.data.expression import *
from alphagen.data.parser import ExpressionParser
from alphagen.models.linear_alpha_pool import LinearAlphaPool, MseAlphaPool
from alphagen.rl.env.wrapper import AlphaEnv
from alphagen.rl.policy import LSTMSharedNet
from alphagen.utils import reseed_everything, get_logger
from alphagen.rl.env.core import AlphaEnvCore
from alphagen_qlib.calculator import QLibStockDataCalculator
from alphagen_qlib.stock_data import initialize_qlib
from alphagen_llm.client import ChatClient, OpenAIClient, ChatConfig
from alphagen_llm.prompts.system_prompt import EXPLAIN_WITH_TEXT_DESC
from alphagen_llm.prompts.interaction import InterativeSession, DefaultInteraction

In [2]:
instruments: str = "csi300"
device = torch.device("cuda:0")


def get_dataset(start: str, end: str) -> StockData:
    return StockData(
        instrument=instruments,
        start_time=start,
        end_time=end,
        device=device
    )

segments = [
    ("2012-01-01", "2021-12-31"),
    ("2022-01-01", "2022-06-30"),
    ("2022-07-01", "2022-12-31"),
    ("2023-01-01", "2023-06-30")
]


datasets = [get_dataset(*s) for s in segments]

[18424:MainThread](2025-02-18 09:46:21,880) INFO - qlib.Initialization - [config.py:420] - default_conf: client.
[18424:MainThread](2025-02-18 09:46:22,998) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[18424:MainThread](2025-02-18 09:46:23,000) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': WindowsPath('C:/Users/tywat/.qlib/qlib_data/cn_data')}


In [3]:
close = Feature(FeatureType.CLOSE)
target = Ref(close, -20) / close - 1
calculators = [QLibStockDataCalculator(d, target) for d in datasets]

In [15]:
from alphagen.data.expression import Operators
from alphagen.data.parser import ExpressionParser

def load_linear_alpha_pool_from_json(json_path: str, 
                                     calculator: QLibStockDataCalculator,
                                     single_alpha: bool = False) -> LinearAlphaPool | list[LinearAlphaPool]:
    # Load the JSON file
    parser = ExpressionParser(Operators)
    with open(json_path, 'r') as f:
        pool_data = json.load(f)

    # Extract expressions and weights from the loaded data
    expressions = pool_data['exprs']
    weights = pool_data['weights']

    # Create an instance of LinearAlphaPool
    alpha_pool = MseAlphaPool(
        capacity=len(expressions),  # Set the capacity based on the number of expressions
        calculator=calculator
    )

    # Load the expressions into the pool
    expres = []
    if single_alpha:
        alpha_pools = []

        for expression,weight in zip(expressions,weights):
            alpha_pool = MseAlphaPool(
                capacity=1,
                calculator=calculator
                )
            expre = parser.parse(expression)
            alpha_pool.force_load_exprs([expre], [weight])
            alpha_pools.append(alpha_pool)

        return  alpha_pools
    else:
        for expression in expressions:
            expre = parser.parse(expression)
            expres.append(expre)
        
        
        alpha_pool.force_load_exprs(expres, weights)

        return alpha_pool

alpha_pools = load_linear_alpha_pool_from_json('out/results/csi300_20_0_20250208124320_rl/251904_steps_pool.json', calculators[1])
alpha_pool = load_linear_alpha_pool_from_json('out/results/csi300_20_0_20250208124320_rl/251904_steps_pool.json', calculators[1], single_alpha=True)

In [17]:
ic_value, rank_ic_value = alpha_pools.test_ensemble(calculators[2])
print(alpha_pools.exprs)
print(ic_value, rank_ic_value)

[Greater(Div(Div(-1.0,$high),EMA($open,10d)),-2.0), Delta(Log($vwap),1d), Mul($volume,Mul(Cov($close,Mul(5.0,Min(Mul($high,-30.0),40d)),40d),-0.01)), Sum(Mul(Corr(Div($vwap,-0.5),$close,5d),-10.0),10d), Abs(Sub(2.0,Div($close,Add(Greater(2.0,Delta(Log($low),5d)),30.0)))), Mad(Add(2.0,Mean($vwap,20d)),10d), Corr($close,$low,10d), Abs(Log(Mad(Sub(-0.5,$close),20d))), Mad(Log(Log($volume)),40d), Mul(0.5,Corr(Log($volume),WMA(Log($volume),40d),40d)), Mul(Mul($volume,Mul(Add(Mean($high,20d),30.0),$high)),0.5), Mul(WMA(Log(Abs(Var($low,5d))),20d),-2.0), Abs(Mul(5.0,Sub($open,30.0))), Mean(Less(Sub(-2.0,Corr($volume,$high,20d)),1.0),10d), Sub(Less(1.0,$low),5.0), Add(Corr(Sub(-1.0,$high),$volume,10d),0.01), WMA(Div(Std(WMA(Div(Div($vwap,30.0),$low),40d),20d),-5.0),10d), WMA(Sub(-1.0,Div($low,$close)),20d), Less(Div($close,$vwap),$volume), Sub(Mad(Mean(Log($low),20d),40d),5.0), None]
0.06614601612091064 0.0644562840461731


In [18]:
alpha_index = 3

ic_value, rank_ic_value = alpha_pool[alpha_index].test_ensemble(calculators[2])
print(alpha_pool[alpha_index].exprs)
print(ic_value, rank_ic_value)

[Sum(Mul(Corr(Div($vwap,-0.5),$close,5d),-10.0),10d), None]
0.010267447680234909 0.010892813093960285


In [16]:
for alpha in alpha_pool:
    print(alpha.exprs)

[Greater(Div(Div(-1.0,$high),EMA($open,10d)),-2.0), None]
[Delta(Log($vwap),1d), None]
[Mul($volume,Mul(Cov($close,Mul(5.0,Min(Mul($high,-30.0),40d)),40d),-0.01)), None]
[Sum(Mul(Corr(Div($vwap,-0.5),$close,5d),-10.0),10d), None]
[Abs(Sub(2.0,Div($close,Add(Greater(2.0,Delta(Log($low),5d)),30.0)))), None]
[Mad(Add(2.0,Mean($vwap,20d)),10d), None]
[Corr($close,$low,10d), None]
[Abs(Log(Mad(Sub(-0.5,$close),20d))), None]
[Mad(Log(Log($volume)),40d), None]
[Mul(0.5,Corr(Log($volume),WMA(Log($volume),40d),40d)), None]
[Mul(Mul($volume,Mul(Add(Mean($high,20d),30.0),$high)),0.5), None]
[Mul(WMA(Log(Abs(Var($low,5d))),20d),-2.0), None]
[Abs(Mul(5.0,Sub($open,30.0))), None]
[Mean(Less(Sub(-2.0,Corr($volume,$high,20d)),1.0),10d), None]
[Sub(Less(1.0,$low),5.0), None]
[Add(Corr(Sub(-1.0,$high),$volume,10d),0.01), None]
[WMA(Div(Std(WMA(Div(Div($vwap,30.0),$low),40d),20d),-5.0),10d), None]
[WMA(Sub(-1.0,Div($low,$close)),20d), None]
[Less(Div($close,$vwap),$volume), None]
[Sub(Mad(Mean(Log($low),

In [19]:
ics = []
rank_ics = []
alphas = []

for alpha in alpha_pool:
    ic_value, rank_ic_value = alpha.test_ensemble(calculators[2])

    ics.append(ic_value)
    rank_ics.append(rank_ic_value)
    alphas.append(alpha.exprs)

df_ic_ind = pd.DataFrame({'alpha': alphas, 'ic': ics, 'rank_ic': rank_ics})
df_ic_ind

Unnamed: 0,alpha,ic,rank_ic
0,"[Greater(Div(Div(-1.0,$high),EMA($open,10d)),-...",0.055708,0.084874
1,"[Delta(Log($vwap),1d), None]",-0.02456,-0.012933
2,"[Mul($volume,Mul(Cov($close,Mul(5.0,Min(Mul($h...",-0.036502,-0.035545
3,"[Sum(Mul(Corr(Div($vwap,-0.5),$close,5d),-10.0...",0.010267,0.010893
4,"[Abs(Sub(2.0,Div($close,Add(Greater(2.0,Delta(...",-0.061593,-0.092103
5,"[Mad(Add(2.0,Mean($vwap,20d)),10d), None]",-0.006636,0.043511
6,"[Corr($close,$low,10d), None]",0.056122,0.063845
7,"[Abs(Log(Mad(Sub(-0.5,$close),20d))), None]",-0.081099,-0.097603
8,"[Mad(Log(Log($volume)),40d), None]",-0.025405,-0.042726
9,"[Mul(0.5,Corr(Log($volume),WMA(Log($volume),40...",-0.040783,-0.046877


In [23]:
for p in Path("out/gp").iterdir():
    seed = int(p.name)

with open(p / "40.json") as f:
    report = json.load(f)


state = report["res"]["res"]["pool_state"]
state["exprs"]

['Mad(Med($high,40d),30d)',
 'Med(Mean(Corr(Mul($open,Med($close,20d)),$close,30d),20d),30d)',
 'Mad(Min($high,50d),40d)',
 'Mad(Min($vwap,30d),30d)',
 'Std(EMA($high,40d),10d)',
 'Mad(Corr(Med($open,30d),$open,10d),30d)',
 'Std(Log(Min($open,40d)),20d)',
 'Var(EMA(Corr($close,-10.0,30d),30d),20d)',
 'Corr($low,$vwap,50d)',
 'Std(Med($close,40d),10d)',
 'Med(Mean(Mean(Corr(Mul($open,0.5),$close,30d),20d),20d),30d)',
 'Log(Mad(Sum($high,30d),10d))',
 'Min(Sub(Ref(Sum($vwap,30d),20d),WMA(Add(5.0,-1.0),30d)),40d)',
 'Std(Var(Delta($high,10d),10d),30d)',
 'WMA(Log(Max(Cov($close,10.0,30d),20d)),10d)',
 'Log(Mad($open,40d))',
 'Med(Mad(Corr($vwap,$volume,20d),30d),30d)',
 'Log(Mad($low,50d))',
 'Log(Mad($open,10d))',
 'Var(Min($open,30d),10d)']

In [4]:
import pickle

file_path = 'out/backtests/50-5/gp/2-graph.pkl'

with open(file_path, 'rb') as file:
    chart = pickle.load(file)
chart.show()

In [5]:
import pickle

file_path = 'out/backtests/50-5/gp/2-report.pkl'

with open(file_path, 'rb') as file:
    report = pickle.load(file)
report

Unnamed: 0_level_0,account,return,total_turnover,turnover,total_cost,cost,value,cash,bench
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,1.000000e+08,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,1.000000e+08,0.013587
2020-01-03,9.985773e+07,-1.242734e-16,9.484930e+07,0.948493,1.422740e+05,0.001423,9.484930e+07,5.008421e+06,-0.001753
2020-01-06,9.968490e+07,-1.380534e-03,1.181589e+08,0.233428,1.772383e+05,0.000350,9.897084e+07,7.140661e+05,-0.003778
2020-01-07,1.005576e+08,9.067840e-03,1.389572e+08,0.208641,2.084358e+05,0.000313,1.000074e+08,5.502283e+05,0.007490
2020-01-08,9.948802e+07,-1.045052e-02,1.514441e+08,0.124176,2.271661e+05,0.000186,9.916298e+07,3.250440e+05,-0.011516
...,...,...,...,...,...,...,...,...,...
2021-12-27,1.502713e+08,-1.131759e-04,7.002606e+09,0.182514,1.050391e+07,0.000274,1.495571e+08,7.141347e+05,-0.000410
2021-12-28,1.515162e+08,8.461025e-03,7.020299e+09,0.117739,1.053045e+07,0.000177,1.510521e+08,4.641126e+05,0.007448
2021-12-29,1.487223e+08,-1.827460e-02,7.036949e+09,0.109891,1.055542e+07,0.000165,1.482908e+08,4.314993e+05,-0.014625
2021-12-30,1.505484e+08,1.245313e-02,7.054229e+09,0.116189,1.058134e+07,0.000174,1.500913e+08,4.571258e+05,0.007787


In [5]:
import pickle

file_path = 'out/backtests/50-5/rl/0-graph.pkl'

with open(file_path, 'rb') as file:
    chart = pickle.load(file)
chart.show()

In [2]:
import pickle

file_path = 'out/backtests/50-5/rl/0-report.pkl'

with open(file_path, 'rb') as file:
    report = pickle.load(file)
report

Unnamed: 0_level_0,account,return,total_turnover,turnover,total_cost,cost,value,cash,bench
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,1.000000e+08,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,1.000000e+08,0.013587
2020-01-03,9.985766e+07,-4.208414e-16,9.489186e+07,0.948919,1.423378e+05,0.001423,9.489186e+07,4.965801e+06,-0.001753
2020-01-06,9.944416e+07,-3.792852e-03,1.180644e+08,0.232056,1.770966e+05,0.000348,9.873570e+07,7.084600e+05,-0.003778
2020-01-07,1.004760e+08,1.074233e-02,1.423650e+08,0.244364,2.135475e+05,0.000367,9.981191e+07,6.640592e+05,0.007490
2020-01-08,9.957497e+07,-8.648418e-03,1.637261e+08,0.212599,2.455891e+05,0.000319,9.901555e+07,5.594233e+05,-0.011516
...,...,...,...,...,...,...,...,...,...
2021-12-27,1.516544e+08,2.265028e-03,1.294456e+10,0.213837,1.941685e+07,0.000321,1.507879e+08,8.664104e+05,-0.000410
2021-12-28,1.528029e+08,7.880469e-03,1.297562e+10,0.204780,1.946343e+07,0.000307,1.520019e+08,8.010219e+05,0.007448
2021-12-29,1.515947e+08,-7.603732e-03,1.300651e+10,0.202148,1.950976e+07,0.000303,1.507677e+08,8.269453e+05,-0.014625
2021-12-30,1.535894e+08,1.346866e-02,1.303790e+10,0.207057,1.955685e+07,0.000311,1.527239e+08,8.654367e+05,0.007787


In [3]:
report["cum_return"] = report["return"].cumsum()
report

Unnamed: 0_level_0,account,return,total_turnover,turnover,total_cost,cost,value,cash,bench,cum_return
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,1.000000e+08,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,1.000000e+08,0.013587,0.000000e+00
2020-01-03,9.985766e+07,-4.208414e-16,9.489186e+07,0.948919,1.423378e+05,0.001423,9.489186e+07,4.965801e+06,-0.001753,-4.208414e-16
2020-01-06,9.944416e+07,-3.792852e-03,1.180644e+08,0.232056,1.770966e+05,0.000348,9.873570e+07,7.084600e+05,-0.003778,-3.792852e-03
2020-01-07,1.004760e+08,1.074233e-02,1.423650e+08,0.244364,2.135475e+05,0.000367,9.981191e+07,6.640592e+05,0.007490,6.949482e-03
2020-01-08,9.957497e+07,-8.648418e-03,1.637261e+08,0.212599,2.455891e+05,0.000319,9.901555e+07,5.594233e+05,-0.011516,-1.698936e-03
...,...,...,...,...,...,...,...,...,...,...
2021-12-27,1.516544e+08,2.265028e-03,1.294456e+10,0.213837,1.941685e+07,0.000321,1.507879e+08,8.664104e+05,-0.000410,6.139802e-01
2021-12-28,1.528029e+08,7.880469e-03,1.297562e+10,0.204780,1.946343e+07,0.000307,1.520019e+08,8.010219e+05,0.007448,6.218607e-01
2021-12-29,1.515947e+08,-7.603732e-03,1.300651e+10,0.202148,1.950976e+07,0.000303,1.507677e+08,8.269453e+05,-0.014625,6.142570e-01
2021-12-30,1.535894e+08,1.346866e-02,1.303790e+10,0.207057,1.955685e+07,0.000311,1.527239e+08,8.654367e+05,0.007787,6.277256e-01


In [1]:
import pickle

file_path = 'out/backtests/50-5/boot/0-graph.pkl'

with open(file_path, 'rb') as file:
    chart = pickle.load(file)
chart.show()