In [45]:
import sys
import os

parent_dir = os.path.dirname(os.getcwd()) 
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from brainLogin import get_session

session = get_session()

response = session.post("https://api.worldquantbrain.com/authentication")
print(response.status_code)
print(response.json())


201
{'user': {'id': 'GY27086'}, 'token': {'expiry': 14400.0}, 'permissions': ['REFERRAL']}


In [29]:

def get_datafields(
    s,
    searchScope,
    dataset_id: str = '',
    search: str = ''
):
    import pandas as pd
    
    # 从searchScope中提取参数
    instrument_type = searchScope['instrumentType']
    region = searchScope['region']
    delay = searchScope['delay']
    universe = searchScope['universe']
    
    # 根据search参数是否为空构建不同的URL
    if len(search) == 0:
        url_template = "https://api.worldquantbrain.com/data-fields?" +\
            f"&instrumentType={instrument_type}" +\
            f"&region={region}&delay={str(delay)}&universe={universe}&dataset.id={dataset_id}&limit=50" +\
            "&offset={x}"
        print(url_template)
        response = s.get(url_template.format(x=0))
        print(response.status_code)
        print(response.json())
        count = s.get(url_template.format(x=0)).json()['count']
    else:
        url_template = "https://api.worldquantbrain.com/data-fields?" +\
            f"&instrumentType={instrument_type}" +\
            f"&region={region}&delay={str(delay)}&universe={universe}&limit=50" +\
            f"&search={search}" +\
            "&offset={x}"
        count = 100
    
    # 收集所有数据字段
    datafields_list = []
    for x in range(0, count, 50):
        datafields = s.get(url_template.format(x=x))
        datafields_list.append(datafields.json()['results'])
    
    # 展平列表并转换为DataFrame
    datafields_list_flat = [item for sublist in datafields_list for item in sublist]
    datafields_df = pd.DataFrame(datafields_list_flat)
    
    return datafields_df

In [30]:
searchScope = {'region': 'USA', 'delay': 1, 'universe': 'TOP3000', 'instrumentType': 'EQUITY'}

fundamental6 = get_datafields(s=session,searchScope=searchScope,dataset_id='fundamental6')


https://api.worldquantbrain.com/data-fields?&instrumentType=EQUITY&region=USA&delay=1&universe=TOP3000&dataset.id=fundamental6&limit=50&offset={x}
200
{'count': 886, 'results': [{'id': 'assets', 'description': 'Assets - Total', 'dataset': {'id': 'fundamental6', 'name': 'Company Fundamental Data for Equity'}, 'category': {'id': 'fundamental', 'name': 'Fundamental'}, 'subcategory': {'id': 'fundamental-fundamental-data', 'name': 'Fundamental Data'}, 'region': 'USA', 'delay': 1, 'universe': 'TOP3000', 'type': 'MATRIX', 'coverage': 0.9524, 'userCount': 10540, 'alphaCount': 37058, 'themes': []}, {'id': 'assets_curr', 'description': 'Current Assets - Total', 'dataset': {'id': 'fundamental6', 'name': 'Company Fundamental Data for Equity'}, 'category': {'id': 'fundamental', 'name': 'Fundamental'}, 'subcategory': {'id': 'fundamental-fundamental-data', 'name': 'Fundamental Data'}, 'region': 'USA', 'delay': 1, 'universe': 'TOP3000', 'type': 'MATRIX', 'coverage': 0.7655, 'userCount': 1392, 'alphaCo

In [31]:
fundamental6 = fundamental6[fundamental6['type'] == 'MATRIX']
fundamental6.head()

Unnamed: 0,id,description,dataset,category,subcategory,region,delay,universe,type,coverage,userCount,alphaCount,themes
0,assets,Assets - Total,"{'id': 'fundamental6', 'name': 'Company Fundam...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",USA,1,TOP3000,MATRIX,0.9524,10540,37058,[]
1,assets_curr,Current Assets - Total,"{'id': 'fundamental6', 'name': 'Company Fundam...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",USA,1,TOP3000,MATRIX,0.7655,1392,9033,[]
2,bookvalue_ps,Book Value Per Share,"{'id': 'fundamental6', 'name': 'Company Fundam...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",USA,1,TOP3000,MATRIX,0.9754,1334,7210,[]
3,capex,Capital Expenditures,"{'id': 'fundamental6', 'name': 'Company Fundam...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",USA,1,TOP3000,MATRIX,0.9646,4972,15362,[]
4,cash,Cash,"{'id': 'fundamental6', 'name': 'Company Fundam...","{'id': 'fundamental', 'name': 'Fundamental'}","{'id': 'fundamental-fundamental-data', 'name':...",USA,1,TOP3000,MATRIX,0.7529,1327,8915,[]


In [32]:
datafields_list_fundamental6 = fundamental6['id'].values



In [33]:
# 初始化alpha列表
alpha_list = []

# 遍历fundamental6数据字段列表
for datafield in datafields_list_fundamental6:
    print("正在格加下Alpha表达式与setting对应")
    # 创建alpha表达式：使用group_rank进行处理
    alpha_expression = f'group_rank({datafield}/cap, subindustry)'
    print(alpha_expression)
    
    # 定义模拟设置
    simulation_data = {
        'type': 'REGULAR',
        'settings': {
            'instrumentType': 'EQUITY',
            'region': 'USA',
            'universe': 'TOP3000',
            'delay': 1,
            'decay': 0,
            'neutralization': 'SUBINDUSTRY',
            'truncation': 0.08,
            'pasteurization': 'ON',
            'unitHandling': 'VERIFY',
            'nanHandling': 'ON',
            'language': 'FASTEXPR',
            'visualization': False,
        },
        'regular': alpha_expression
    }
    
    # 将模拟数据添加到alpha列表
    alpha_list.append(simulation_data)

# 打印要模拟的Alpha表达式数量
print(f'there are {len(alpha_list)} Alphas to simulate')

正在格加下Alpha表达式与setting对应
group_rank(assets/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(assets_curr/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(bookvalue_ps/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(capex/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cash/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cash_st/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cashflow/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cashflow_dividends/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cashflow_fin/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cashflow_invst/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cashflow_op/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(cogs/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(current_ratio/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(debt/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(debt_lt/cap, subindustry)
正在格加下Alpha表达式与setting对应
group_rank(debt_st/cap, subind

In [34]:
import json
import os
from time import sleep
from datetime import datetime

def save_simulation_record(alpha_id, datafield, status, filename='simulation_records.json'):
    """保存模拟记录"""
    record = {
        'alpha_id': alpha_id,
        'datafield': datafield,
        'status': status,
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }
    
    # 加载现有记录
    records = []
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            records = json.load(f)
    
    # 添加新记录
    records.append(record)
    
    # 保存记录
    with open(filename, 'w') as f:
        json.dump(records, f, indent=2)

def get_last_processed_index(filename='simulation_records.json'):
    """获取最后处理的索引"""
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            records = json.load(f)
            return len(records)
    return 0

# 主处理循环
start_index = get_last_processed_index()
print(f"Starting from index {start_index}")

for i, alpha in enumerate(alpha_list[start_index:], start=start_index):
    print(f"Processing alpha {i+1}/{len(alpha_list)}")
    
    try:
        # 发送模拟请求
        sim_resp = session.post(
            'https://api.worldquantbrain.com/simulations',
            json=alpha
        )
        
        try:
            # 获取进度监控URL
            sim_progress_url = sim_resp.headers['Location']
            
            # 循环检查模拟进度
            while True:
                sim_progress_resp = session.get(sim_progress_url)
                retry_after_sec = float(sim_progress_resp.headers.get("Retry-After", 0))
                
                if retry_after_sec == 0:  # simulation done!
                    break
                    
                sleep(retry_after_sec)
                
            # 获取最终模拟结果
            alpha_id = sim_progress_resp.json()["alpha"]
            print(f"Success - Alpha {i+1}: {alpha_id}")
            
            # 保存成功记录
            save_simulation_record(
                alpha_id=alpha_id,
                datafield=alpha['regular'],  # 保存alpha表达式
                status='success'
            )
            
        except Exception as e:
            print(f"Error in simulation {i+1}: {e}")
            # 保存失败记录
            save_simulation_record(
                alpha_id=None,
                datafield=alpha['regular'],
                status=f'error: {str(e)}'
            )
            
    except Exception as e:
        print(f"Failed to start simulation {i+1}: {e}")
        save_simulation_record(
            alpha_id=None,
            datafield=alpha['regular'],
            status=f'failed_to_start: {str(e)}'
        )

Starting from index 0
Processing alpha 1/574
Success - Alpha 1: rjnmpb8
Processing alpha 2/574
Success - Alpha 2: X7OEGRz
Processing alpha 3/574
Success - Alpha 3: qjwZ10Z
Processing alpha 4/574
Success - Alpha 4: Q7odk6g
Processing alpha 5/574
Success - Alpha 5: V7RogxG
Processing alpha 6/574
Success - Alpha 6: kjY5le6
Processing alpha 7/574
Success - Alpha 7: mjlk9ox
Processing alpha 8/574
Success - Alpha 8: pjpggx3
Processing alpha 9/574
Success - Alpha 9: 5l2VRm1
Processing alpha 10/574
Success - Alpha 10: oqkeQ6b
Processing alpha 11/574
Success - Alpha 11: djPw8px
Processing alpha 12/574
Success - Alpha 12: 5l2V7kn
Processing alpha 13/574
Success - Alpha 13: N72oqeo
Processing alpha 14/574
Success - Alpha 14: W7xmdXx
Processing alpha 15/574
Success - Alpha 15: 2l2kRxY
Processing alpha 16/574
Success - Alpha 16: 7j2Vx8L
Processing alpha 17/574
Success - Alpha 17: M72Akln
Processing alpha 18/574
Success - Alpha 18: El2AkLr
Processing alpha 19/574
Success - Alpha 19: 6l2bEYE
Processi

In [36]:
# 查看处理进度
def check_progress():
    if os.path.exists('simulation_records.json'):
        with open('simulation_records.json', 'r') as f:
            records = json.load(f)
            total = len(records)
            success = sum(1 for r in records if r['status'] == 'success')
            print(f"Total processed: {total}")
            print(f"Successful: {success}")
            print(f"Failed: {total - success}")
            
# 重新运行失败的simulation
def retry_failed():
    if os.path.exists('simulation_records.json'):
        with open('simulation_records.json', 'r') as f:
            records = json.load(f)
            failed = [r['datafield'] for r in records if r['status'] != 'success']
            print(f"Found {len(failed)} failed simulations to retry")
            return failed
    return []

In [40]:
def retry_failed_simulations(session):
    """重试失败的模拟"""
    from time import sleep
    
    def load_failed_records(filename='simulation_records.json'):
        """加载失败的记录"""
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                records = json.load(f)
                # 筛选出失败的记录
                failed = [r for r in records if r['status'] != 'success']
                print(f"Found {len(failed)} failed simulations")
                return failed
        return []
    
    failed_records = load_failed_records()
    if not failed_records:
        print("No failed simulations found")
        return
    
    for i, record in enumerate(failed_records, 1):
        datafield = record['datafield']
        print(f"Retrying {i}/{len(failed_records)}: {datafield}")
        
        # 重新构建simulation_data
        simulation_data = {
            'type': 'REGULAR',
            'settings': {
                'instrumentType': 'EQUITY',
                'region': 'USA',
                'universe': 'TOP3000',
                'delay': 1,
                'decay': 0,
                'neutralization': 'SUBINDUSTRY',
                'truncation': 0.08,
                'pasteurization': 'ON',
                'unitHandling': 'VERIFY',
                'nanHandling': 'ON',
                'language': 'FASTEXPR',
                'visualization': False,
            },
            'regular': datafield
        }
        
        try:
            # 发送模拟请求
            sim_resp = session.post(
                'https://api.worldquantbrain.com/simulations',
                json=simulation_data
            )
            
            # 获取进度监控URL
            sim_progress_url = sim_resp.headers['Location']
            
            # 循环检查模拟进度
            while True:
                sim_progress_resp = session.get(sim_progress_url)
                retry_after_sec = float(sim_progress_resp.headers.get("Retry-After", 0))
                
                if retry_after_sec == 0:  # simulation done!
                    break
                    
                sleep(retry_after_sec)
                
            # 获取最终模拟结果
            alpha_id = sim_progress_resp.json()["alpha"]
            print(f"Success - Retry {i}: {alpha_id}")
            
            # 更新记录
            save_simulation_record(
                alpha_id=alpha_id,
                datafield=datafield,
                status='success'
            )
            
        except Exception as e:
            print(f"Error in retry {i}: {e}")
            save_simulation_record(
                alpha_id=None,
                datafield=datafield,
                status=f'retry_failed: {str(e)}'
            )

In [44]:
# 1. 首先创建新的session
session = get_session()

# 2. 检查失败的模拟数量
def check_failed():
    if os.path.exists('simulation_records.json'):
        with open('simulation_records.json', 'r') as f:
            records = json.load(f)
            failed = [r for r in records if r['status'] != 'success']
            print(f"Total failed simulations: {len(failed)}")
            return failed
    return []

# 3. 重试失败的模拟
retry_failed_simulations(session)

# 4. 再次检查状态
failed = check_failed()
if not failed:
    print("All simulations completed successfully!")
else:
    print(f"Still have {len(failed)} failed simulations")

Found 5 failed simulations
Retrying 1/5: group_rank(fnd6_dc/cap, subindustry)
Success - Retry 1: 0waoJ0v
Retrying 2/5: group_rank(fnd6_newqv1300_tfvaq/cap, subindustry)
Success - Retry 2: ljNQ6v7
Retrying 3/5: group_rank(fnd6_pstkl/cap, subindustry)
Success - Retry 3: 9X29px2
Retrying 4/5: group_rank(fnd6_pstkrv/cap, subindustry)
Success - Retry 4: n7XnKQl
Retrying 5/5: group_rank(fnd6_txw/cap, subindustry)
Success - Retry 5: zkLOlJE
Total failed simulations: 5
Still have 5 failed simulations
