In [1]:
import numpy as np
import json
import pandas as pd

### 链条式找配置，并组装紧密数组（每张地图只需做一次，可用于生成一串派生环境场）

In [2]:
import re
from pathlib import Path
import json

"""
数据处理步骤 (Metaphysical Data Processing Steps):

1.  **数据源与目标确立 (Source & Target Definition)**
    - Source: 位于 `precompute/data` 下的静态配置表 (JSON)，定义了游戏世界的逻辑结构。
    - Target: 位于 `ExportedData` 下的 Numpy 数组，代表了具体场景的物理数据 (Voxel/Grid)。

2.  **标识符提取 (Identifier Extraction)**
    - 从物理数据的文件名/路径中提取关键标识符 (SceneID / AssetID)。
    - 例如: 从 `Fishing_1006001_Global.npy` 提取 `1006001`。

3.  **逻辑映射建立 (Logical Mapping)**
    - 利用中间件/配置表 (`map_scene.json`) 将物理标识符 (AssetID) 映射回系统逻辑标识符 (MapID)。
    - 这一步是连接“即时演算数据”与“策划配置数据”的关键桥梁。

4.  **上下文关联与整合 (Context Association & Integration)**
    - 以 MapID 为锚点，级联查询关联的 Pond, Stock, Fish 配置。
    - 将分散的数据整合为适合并行计算的结构化格式 (Numpy/Pandas)。
"""

# 配置路径
DATA_ROOT = Path(r'D:\fishinggame\precompute\data\1\1001')
EXPORTED_DATA_ROOT = Path(r'D:\fishinggame\ExportedData')

# 加载 map_scene.json
with open(DATA_ROOT / 'map_scene.json', 'r', encoding='utf-8') as f:
    map_scene = json.load(f)

# 建立 assetId -> map_id 的反向索引
asset_to_map = {info['assetId']: int(map_id) for map_id, info in map_scene.items() if info.get('assetId')}

In [3]:
def get_scene_id_from_path(npy_path: str) -> str:
    """从npy文件路径中提取scene_id (如 Fishing_1006001_Global.npy -> '1006001')"""
    match = re.search(r'Fishing_(\d+)', str(npy_path))
    if not match:
        raise ValueError(f'无法从路径中提取scene_id: {npy_path}')
    return match.group(1)

def get_map_id_from_scene_id(scene_id: str) -> int:
    """根据scene_id查找对应的map_id"""
    if scene_id in asset_to_map:
        return asset_to_map[scene_id]
    raise ValueError(f'找不到scene_id {scene_id} 对应的map_id')

def get_map_id_from_npy_path(npy_path: str) -> int:
    """从npy文件路径直接获取map_id"""
    scene_id = get_scene_id_from_path(npy_path)
    return get_map_id_from_scene_id(scene_id)

# 测试示例
test_path = r'D:\fishinggame\ExportedData\Fishing_1006001_Dense_20260107_154037\Fishing_1006001_Global.npy'
scene_id = get_scene_id_from_path(test_path)
map_id = get_map_id_from_scene_id(scene_id)
print(f'文件路径: {test_path}')
print(f'提取的 scene_id: {scene_id}')
print(f'对应的 map_id: {map_id}')
print(f'地图信息: {map_scene[str(map_id)]}')

文件路径: D:\fishinggame\ExportedData\Fishing_1006001_Dense_20260107_154037\Fishing_1006001_Global.npy
提取的 scene_id: 1006001
对应的 map_id: 1009
地图信息: {'id': 1009, 'name': 'map_base_6', 'desc': 106, 'assetId': '1006001', 'originOffsetX': 0, 'originOffsetY': 0, 'offsetX': 0, 'offsetY': 0, 'sizeX': 1000, 'sizeY': 1000, 'rotate': 0, 'mark': ''}


#### 对于局部池取stock
* 
* 去D:\fishinggame\precompute\data\1\1001\fish_stock.json当中，

In [4]:
# 加载额外的配置表
with open(DATA_ROOT / 'fish_pond_list.json', 'r', encoding='utf-8') as f:
    fish_pond_list = json.load(f)

with open(DATA_ROOT / 'fish_stock.json', 'r', encoding='utf-8') as f:
    fish_stock_config = json.load(f)

print(f"已加载 {len(fish_pond_list)} 个鱼塘配置")
print(f"已加载 {len(fish_stock_config)} 个 Stock 配置")

# 获取 map_id 对应的 map_scene 配置
current_map_info = map_scene.get(str(map_id))
if not current_map_info:
    raise ValueError(f"Found no map info for id {map_id}")

map_desc_id = current_map_info.get('desc')
print(f"\n当前地图: {current_map_info['name']} (ID: {map_id})")
print(f"关联的 Desc ID (用于对应 fish_pond_list.mapId): {map_desc_id}")

# 查找关联的 Pond 和 Stock
print(f"\n查找 mapId == {map_desc_id} 的鱼塘...")
related_ponds = [pond for pond in fish_pond_list.values() if pond.get('mapId') == map_desc_id]

if not related_ponds:
    print("警告: 未找到关联的鱼塘配置 (Pond)")
else:
    print(f"找到 {len(related_ponds)} 个关联鱼塘:")
    for pond in related_ponds:
        # 兼容处理: 有些json key可能是str类型的id
        pond_id = pond.get('id')
        pond_name = pond.get('name')
        stock_id = pond.get('fishStockId')
        
        print(f"  - Pond: {pond_name} (ID: {pond_id}) -> Stock ID: {stock_id}")
        
        # 查询 Stock 详情 (注意 key 可能是字符串)
        stock_info = fish_stock_config.get(str(stock_id))
        if stock_info:
             print(f"    Stock 详情: Name={stock_info.get('name')}, ResetTime={stock_info.get('resetDayTime')}")
        else:
             print(f"    警告: 在 fish_stock.json 中未找到 Stock ID {stock_id}")

已加载 7 个鱼塘配置
已加载 6 个 Stock 配置

当前地图: map_base_6 (ID: 1009)
关联的 Desc ID (用于对应 fish_pond_list.mapId): 106

查找 mapId == 106 的鱼塘...
找到 1 个关联鱼塘:
  - Pond: Sunset_Stream (ID: 301020005) -> Stock ID: 301030106
    Stock 详情: Name=stock_sunset, ResetTime=05:00


In [5]:
# 顺着往下进行数据查找和组装numpy，供后面的计算使用。
# 大致思路为：
# 5. 遍历 Stock ID (Stock -> Release)：
#    - 从相关联的池塘中提取 Stock ID，查找其下属的所有 Release ID。
# 6. Eager Loading (Release -> Fish/Env):
#    - 对每个 Release ID，立即提取所需的全部配置信息，包括：
#      - 基础属性: qualityId (即原 fishId), weight/length ranges (min/max).
#      - 关键系数: minEnvCoeff, minAdaptCoeff.
#      - 关联元数据: speciesId, envAffinityId (即原 envId).
# 7. 组装 DataFrame (Assembly):
#    - 将上述所有提取的字段扁平化，组装成 Pandas DataFrame (`stockFishesPd`)。
#    - 每一行代表一个 Release 配置，为后续的概率计算和环境场生成做准备。

In [6]:
import pandas as pd

# Load additional configurations
print("Loading Release and Quality configs...")
with open(DATA_ROOT / 'stock_release.json', 'r', encoding='utf-8') as f:
    stock_release_config = json.load(f)

with open(DATA_ROOT / 'fish_release.json', 'r', encoding='utf-8') as f:
    fish_release_config = json.load(f)

with open(DATA_ROOT / 'basic_fish_quality.json', 'r', encoding='utf-8') as f:
    basic_fish_quality_config = json.load(f)
print("Configs loaded.")

rows = []

# 'related_ponds' should be available from the previous cell execution
# If not, we rely on the logic that this cell is run after Cell 5.
if 'related_ponds' not in locals():
    print("Warning: 'related_ponds' not found. Please ensure the previous cell is executed.")
    unique_stock_ids = set()
else:
    unique_stock_ids = set(pond.get('fishStockId') for pond in related_ponds if pond.get('fishStockId'))

print(f"Processing {len(unique_stock_ids)} unique Stock IDs associated with the current map.")

for stock_id in unique_stock_ids:
    # Find all releases for this stock
    # Note: Scanning all values in stock_release_config might be inefficient for very large datasets,
    # but acceptable for this precompute scope.
    stock_releases = [item for item in stock_release_config.values() if item.get('stockId') == stock_id]
    
    for sr in stock_releases:
        release_id = sr.get('releaseId')
        fish_quality_id = sr.get('fishId') # referred as fishId in stock_release.json, but actually qualityId
        fish_env_affinity_id = sr.get('fishEnvId') # referred as fishEnvId in stock_release.json
        
        # Lookup Release Info
        release_info = fish_release_config.get(str(release_id))
        if not release_info:
            # print(f"Warning: Release ID {release_id} not found in fish_release.json")
            continue
            
        # Lookup Fish Quality Info
        fish_info = basic_fish_quality_config.get(str(fish_quality_id))
        species_id = fish_info.get('species', -1) if fish_info else -1
            
        row = {
            'stockId': stock_id,
            'releaseId': release_id,
            'qualityId': fish_quality_id,
            'envAffinityId': fish_env_affinity_id, # Renamed from envId for clarity
            'speciesId': species_id,
            
            # Release Limits
            'weight_min': release_info.get('weightMin'),
            'weight_max': release_info.get('weightMax'),
            'len_min': release_info.get('lengthMin'),
            'len_max': release_info.get('lengthMax'),

            # Environment Coefficients (Added per request)
            'minEnvCoeff': release_info.get('minEnvCoeff', 0),
            'minAdaptCoeff': release_info.get('minAdaptCoeff', 0),
            
            # Debug/Display info
            'name': release_info.get('name'),
            'probWeight': release_info.get('probWeightIdeal')
        }
        rows.append(row)

stockFishesPd = pd.DataFrame(rows)
print(f"Created stockFishesPd with {len(stockFishesPd)} rows.")
if not stockFishesPd.empty:
    print(stockFishesPd.head().to_string())
    # print("\nColumn Types:")
    # print(stockFishesPd.dtypes)
else:
    print("DataFrame is empty. Check if stock_release.json maps correctly to the pond stock IDs.")

Loading Release and Quality configs...
Configs loaded.
Processing 1 unique Stock IDs associated with the current map.
Created stockFishesPd with 35 rows.
     stockId  releaseId  qualityId  envAffinityId  speciesId  weight_min  weight_max  len_min  len_max  minEnvCoeff  minAdaptCoeff                                name  probWeight
0  301030106     300500  101034430        1013390  101020063         150         450       26       37            0              0  Release_American_Shad_Young_sunset      250000
1  301030106     300510  101034090        1013050  101020010          50         200       16       26            0              0    Release_Brook_Trout_Young_sunset      100000
2  301030106     300520  101031007        1010066  101020010         200         350       26       32            0              0   Release_Brook_Trout_Common_sunset      100000
3  301030106     300530  101034450        1013410  101020003         150         450       28       40            0              0

In [7]:
# 继续进行后续数据关联 (Data Enrichment Phase II)

# 8. 环境亲和性关联 (Environment Affinity Lookup):
#    - 目标: 丰富鱼类的环境适应参数。
#    - 操作: 使用 `envAffinityId` (原 `fishEnvId`) 关联 `fish_env_affinity.json`。
#    - 提取关键属性 (Attributes Extraction):
#         - 基础ID关联: structId (结构), tempId (温度), layerId (水层), lightId (光照)。
#         - 诱鱼系数: baitCoeffGroup, baitTypeCoeffGroup, periodCoeffGroup (时段)。
#         - 适应性参数: 
#             - pressureSensitivity (气压敏感度)
#             - minAdaptLureRatio / maxAdaptLureRatio (路亚适应比例)
#             - maxAcceptLengthRatio (最大接受长度比)
#         - 衰减配置: underLengthDecayCoeff / overLengthDecayCoeff (体型偏离衰减)。

# 9. 结构体亲和性级联查找 (Structure Affinity Cascade):
#    - 目标: 获取具体的物理结构交互参数。
#    - 操作: 使用步骤 8 获得的 `structId`，查询 `struct_affinity.json`。
#    - 提取参数 (Parameters Extraction): 
#         - `List`: 包含 `structType` (结构类型) 和 `coeff` (系数) 的列表。

# 10. 温度亲和性级联查找 (Temperature Affinity Cascade):
#    - 目标: 获取鱼类对温度的敏感度配置。
#    - 操作: 使用步骤 8 获得的 `tempId`，查询 `temp_affinity.json`。
#    - 提取参数 (Parameters Extraction): 
#         - `temperatureFav`: 最适温度 (注意可能需要缩放，如 220 -> 22.0)。
#         - `tempAffectedRatio`: 温度影响比率。
#         - `tempThreshold`: 温度容忍阈值。

# 11. 水层亲和性级联查找 (Water Layer Affinity Cascade):
#    - 目标: 获取鱼类在不同水层的分布偏好。
#    - 操作: 使用步骤 8 获得的 `layerId`，查询 `water_layer_affinity.json`。
#    - 提取参数 (Parameters Extraction): 
#         - `List`: 包含 `layerType` (水层类型, 如上/中/下) 和 `coeff` (系数) 的列表。

In [8]:
# 9-11. 实现环境与亲和性级联查找 (Env & Affinity Cascade Lookup)

print("Loading Affinity Configs...")
# 1. 加载所有亲和性配置 (Load Configs)
with open(DATA_ROOT / 'fish_env_affinity.json', 'r', encoding='utf-8') as f:
    env_affinity_config = json.load(f)

with open(DATA_ROOT / 'struct_affinity.json', 'r', encoding='utf-8') as f:
    struct_affinity_config = json.load(f)

with open(DATA_ROOT / 'temp_affinity.json', 'r', encoding='utf-8') as f:
    temp_affinity_config = json.load(f)

with open(DATA_ROOT / 'water_layer_affinity.json', 'r', encoding='utf-8') as f:
    layer_affinity_config = json.load(f)
print("Affinity Configs loaded.")

# 2. 准备查找字典 (Prepare Lookup Dicts)
#    优化: 直接构建 id -> data 的快速查找字典，避免每次遍历 list
#    注意: JSON key通常是字符串, DataFrame中Id可能是int, 查找时需注意类型转换

def get_config_by_id(config_dict, target_id):
    """Safe lookup helper handling str/int key mismatch"""
    if target_id is None:
        return None
    return config_dict.get(str(target_id))

# 3. 扩展 DataFrame (Enrich DataFrame)
#    虽然可以使用 apply，但在列数较多且逻辑复杂时，迭代或列表推导式便于调试和错误处理
#    考虑到数据量不大 (几十到几百行)，直接遍历 row 更新字典列表然后重新创建 DF 也是一种清晰的方法
#    或者使用 apply + Series expand

def enrich_row(row):
    # Step 8: Env Affinity Lookup
    env_id = row.get('envAffinityId')
    env_info = get_config_by_id(env_affinity_config, env_id)
    
    extra_data = {}
    
    if env_info:
        # Extract basic Env IDs
        struct_id = env_info.get('structId')
        temp_id = env_info.get('tempId')
        layer_id = env_info.get('layerId')
        light_id = env_info.get('lightId')
        
        extra_data.update({
            'structId': struct_id,
            'tempId': temp_id,
            'layerId': layer_id,
            'lightId': light_id,
            # Coeffs
            'baitCoeffGroup': env_info.get('baitCoeffGroup'),
            'baitTypeCoeffGroup': env_info.get('baitTypeCoeffGroup'),
            'periodCoeffGroup': env_info.get('periodCoeffGroup'),
            # Adaptability Stats
            'pressureSensitivity': env_info.get('pressureSensitivity'),
            'minAdaptLureRatio': env_info.get('minAdaptLureRatio'),
            'maxAdaptLureRatio': env_info.get('maxAdaptLureRatio'),
            'maxAcceptLengthRatio': env_info.get('maxAcceptLengthRatio'),
            'underLengthDecayCoeff': env_info.get('underLengthDecayCoeff'),
            'overLengthDecayCoeff': env_info.get('overLengthDecayCoeff'),
        })
        
        # Step 9: Structure Affinity Cascade
        struct_info = get_config_by_id(struct_affinity_config, struct_id)
        if struct_info:
            extra_data['structList'] = struct_info.get('List') # raw list of {structType, coeff}
        
        # Step 10: Temperature Affinity Cascade
        temp_info = get_config_by_id(temp_affinity_config, temp_id)
        if temp_info:
            extra_data['temperatureFav'] = temp_info.get('temperatureFav')
            extra_data['tempAffectedRatio'] = temp_info.get('tempAffectedRatio')
            extra_data['tempThreshold'] = temp_info.get('tempThreshold')
            
        # Step 11: Water Layer Affinity Cascade
        layer_info = get_config_by_id(layer_affinity_config, layer_id)
        if layer_info:
            extra_data['layerList'] = layer_info.get('List') # raw list of {layerType, coeff}
            
    return pd.Series(extra_data)

# 应用扩展逻辑
print("Enriching DataFrame...")
if not stockFishesPd.empty:
    enriched_columns = stockFishesPd.apply(enrich_row, axis=1)
    
    # Concatenate original df with new columns
    stockFishesPd = pd.concat([stockFishesPd, enriched_columns], axis=1)
    
    print("Enrichment Complete.")
    print(f"New DataFrame Shape: {stockFishesPd.shape}")
    print(stockFishesPd[['qualityId','envAffinityId','temperatureFav', 'structId', 'tempId', 'layerList', 'structList', 'periodCoeffGroup']].head().to_string())
else:
    print("stockFishesPd is empty, skipping enrichment.")

Loading Affinity Configs...
Affinity Configs loaded.
Enriching DataFrame...
Enrichment Complete.
New DataFrame Shape: (35, 31)
   qualityId  envAffinityId  temperatureFav  structId   tempId                                                                                   layerList                                                                                                                                                                                                                                                                                                                                                                                                       structList  periodCoeffGroup
0  101034430        1013390             195   2011030  2021010  [{'layerType': 1, 'coeff': 1}, {'layerType': 2, 'coeff': 1}, {'layerType': 3, 'coeff': 1}]     [{'structType': 0, 'coeff': 1}, {'structType': 1, 'coeff': 0.3}, {'structType': 2, 'coeff': 0.3}, {'structType': 3, 'coeff': 0.3}, {'structT

In [9]:
# 12-14. 诱鱼与时段亲和性关联 (Bait & Period Affinity)

import json
import pandas as pd

print("Loading Bait/Period Configs...")
# Load JSONs
with open(DATA_ROOT / 'bait_affinity.json', 'r', encoding='utf-8') as f:
    bait_affinity_data = json.load(f)
with open(DATA_ROOT / 'bait_type_affinity.json', 'r', encoding='utf-8') as f:
    bait_type_affinity_data = json.load(f)
with open(DATA_ROOT / 'period_affinity.json', 'r', encoding='utf-8') as f:
    period_affinity_data = json.load(f)

# Helper to aggregate by group
# Transform {id: {data}} -> {group_id: [data_list]}
def aggregate_by_group(source_data, group_key_name):
    grouped = {}
    for item in source_data.values():
        grp_id = item.get(group_key_name)
        
        # Safe cast to string for consistent lookup key
        if grp_id is not None:
            grp_key = str(int(grp_id)) # int -> str to match potential int IDs
            if grp_key not in grouped:
                grouped[grp_key] = []
            grouped[grp_key].append(item)
    return grouped

print("Aggregating Groups...")
# Note: 'baitCoeffGroup' matches the field in fish_env_affinity
bait_groups = aggregate_by_group(bait_affinity_data, 'baitCoeffGroup') 
bait_type_groups = aggregate_by_group(bait_type_affinity_data, 'baitTypeCoeffGroup') 

# Note: In period_affinity.json, the key is 'periodGroup', but in fish_env it is 'periodCoeffGroup'
period_groups = aggregate_by_group(period_affinity_data, 'periodGroup')

# Enrich Wrapper
def enrich_bait_period(row):
    # Lookup Bait Group
    # row['baitCoeffGroup'] comes from fish_env_affinity, expected to be int or str
    b_val = row.get('baitCoeffGroup')
    if pd.notnull(b_val):
        b_grp = str(int(b_val))
        bait_list = bait_groups.get(b_grp, [])
    else:
        bait_list = []
    
    # Lookup Bait Type Group
    bt_val = row.get('baitTypeCoeffGroup')
    if pd.notnull(bt_val):
        bt_grp = str(int(bt_val))
        bait_type_list = bait_type_groups.get(bt_grp, [])
    else:
        bait_type_list = []
    
    # Lookup Period Group
    p_val = row.get('periodCoeffGroup')
    if pd.notnull(p_val):
        p_grp = str(int(p_val))
        period_list = period_groups.get(p_grp, [])
    else:
        period_list = []
    
    return pd.Series({
        'baitList': bait_list,          # detailed list of {baitId, coeff...}
        'baitTypeList': bait_type_list, # detailed list of {baitSubType, coeff...}
        'periodList': period_list       # detailed list of {periodId, activityFactor...}
    })

print("Enriching StockFishesPd with Bait/Period lists...")
if not stockFishesPd.empty:
    bp_columns = stockFishesPd.apply(enrich_bait_period, axis=1)
    
    # Concatenate
    # Drop existing if re-running to avoid dupe columns
    cols_to_drop = [c for c in ['baitList', 'baitTypeList', 'periodList'] if c in stockFishesPd.columns]
    if cols_to_drop:
         stockFishesPd = stockFishesPd.drop(columns=cols_to_drop)
            
    stockFishesPd = pd.concat([stockFishesPd, bp_columns], axis=1)
    
    print("Enrichment Complete.")
    print(f"DataFrame Shape: {stockFishesPd.shape}")
    
    # Sample Output
    # Only show limited info
    cols_check = ['qualityId', 'baitCoeffGroup', 'baitList', 'periodList']
    # Just print the first row nicely formatted
    first_row = stockFishesPd.iloc[0]
    print(f"Sample Row 0 - QualityID: {first_row['qualityId']}")
    print(f"BaitCoeffGroup: {first_row['baitCoeffGroup']}")
    print(f"BaitList Count: {len(first_row['baitList'])}")
    print(f"PeriodList Count: {len(first_row['periodList'])}")
else:
    print("DataFrame empty.")

Loading Bait/Period Configs...
Aggregating Groups...
Enriching StockFishesPd with Bait/Period lists...
Enrichment Complete.
DataFrame Shape: (35, 34)
Sample Row 0 - QualityID: 101034430
BaitCoeffGroup: 80000
BaitList Count: 1
PeriodList Count: 8


### 核心计算准备 1：亲和度配置数据构建 (Affinity DataFrame Construction)
按照 Technical Guide 要求，将配置数据构建为**可查找的 DataFrame** (行=Fish, 列=Feature)，并在进入计算与广播阶段前转换为稠密矩阵。

*   **StructAffinityDataFrame**: `Row: FishID, Col: StructTypeID` (用于 Gather 查找)
*   **LayerAffinityDataFrame**: `Row: FishID, Col: LayerTypeID`
*   **TempAffinityDataFrame**: `Row: FishID, Col: [Fav, Ratio, Threshold]`

In [10]:
import numpy as np
import pandas as pd

# 1. 定义常量
# Struct Constants (0-11 based on data_formula.md)
VALID_STRUCT_TYPES = list(range(12)) # 0..11
# Layer Constants (1-3 based on data_formula.md)
VALID_LAYER_TYPES = [1, 2, 3]

def build_dense_matrices(df):
    num_fishes = len(df)
    
    # ----------------------------------------------------
    # Quality ID Indexing
    # ----------------------------------------------------
    if 'qualityId' not in df.columns:
         print("Error: 'qualityId' not found in DataFrame columns!")
         raise KeyError("qualityId missing")
         
    fish_quality_ids = df['qualityId'].values
    
    # ----------------------------------------------------
    # 1. Temp Affinity DataFrame
    # ----------------------------------------------------
    print("Building Temp Affinity DataFrame...")
    temp_cols = ['qualityId', 'temperatureFav', 'tempAffectedRatio', 'tempThreshold']
    temp_df = df[temp_cols].copy()
    temp_df.set_index('qualityId', inplace=True)
    
    # Normalize/Scale Values
    temp_df['temperatureFav'] = temp_df['temperatureFav'] / 10.0
    temp_df['tempAffectedRatio'] = temp_df['tempAffectedRatio'] / 10000.0
    temp_df['tempThreshold'] = temp_df['tempThreshold'] / 10000.0
    
    if temp_df.isnull().values.any():
        temp_df.fillna(0, inplace=True)

    print("Temp DF Head (Indexed by QualityId):")
    print(temp_df.head())

    # ----------------------------------------------------
    # 2. Struct Affinity DataFrame (Named Columns: struct_0, struct_1...)
    # ----------------------------------------------------
    print("\nBuilding Struct Affinity DataFrame...")
    struct_rows = []
    struct_col_map = {t: f'struct_{t}' for t in VALID_STRUCT_TYPES}
    
    for idx, row in df.iterrows():
        s_list = row.get('structList')
        row_dict = {}
        if isinstance(s_list, list):
            for item in s_list:
                s_type = item.get('structType')
                coeff = item.get('coeff')
                if s_type in struct_col_map:
                    row_dict[struct_col_map[s_type]] = coeff
        struct_rows.append(row_dict)
    
    struct_df = pd.DataFrame(struct_rows, index=fish_quality_ids)
    
    # Ensure all valid struct columns exist
    struct_target_cols = [struct_col_map[t] for t in VALID_STRUCT_TYPES]
    struct_df = struct_df.reindex(columns=struct_target_cols, fill_value=0.0) 
    struct_df.fillna(0.0, inplace=True)
    
    print("Struct DF Head (Cols 0-5, Indexed by QualityId, Named Columns):")
    print(struct_df.iloc[:, :6].head())

    # ----------------------------------------------------
    # 3. Layer Affinity DataFrame (Named Columns: layer_1, layer_2...)
    # ----------------------------------------------------
    print("\nBuilding Layer Affinity DataFrame...")
    layer_rows = []
    layer_col_map = {t: f'layer_{t}' for t in VALID_LAYER_TYPES}
    
    for idx, row in df.iterrows():
        l_list = row.get('layerList')
        row_dict = {}
        if isinstance(l_list, list):
            for item in l_list:
                l_type = item.get('layerType')
                coeff = item.get('coeff')
                if l_type in layer_col_map:
                    row_dict[layer_col_map[l_type]] = coeff
        layer_rows.append(row_dict)
        
    layer_df = pd.DataFrame(layer_rows, index=fish_quality_ids)
    
    # Ensure all valid layer columns exist
    layer_target_cols = [layer_col_map[t] for t in VALID_LAYER_TYPES]
    layer_df = layer_df.reindex(columns=layer_target_cols, fill_value=0.0)
    layer_df.fillna(0.0, inplace=True)
    
    print("Layer DF Head (Indexed by QualityId, Named Columns):")
    print(layer_df.head())

    # ----------------------------------------------------
    # 4. Convert to Numpy (float16)
    # ----------------------------------------------------
    # Struct: Ensure strict order 0..11
    m_struct = struct_df[struct_target_cols].values.astype(np.float16)
    
    # Layer: Ensure strict order 1..3
    m_layer = layer_df[layer_target_cols].values.astype(np.float16)
    
    m_temp = temp_df.values.astype(np.float16)
    
    return m_struct, m_layer, m_temp, temp_df, struct_df, layer_df

print("Converting to Dense Matrices (via DataFrames)...")
if 'stockFishesPd' in locals():
    m_struct, m_layer, m_temp, _, _, _ = build_dense_matrices(stockFishesPd)

    print(f"\nFinal Struct Matrix: {m_struct.shape} (Cols: 0..11)")
    print(f"Final Layer Matrix:  {m_layer.shape} (Cols: 1..3)")
    print(f"Final Temp Params:   {m_temp.shape}")
else:
    print("Error: stockFishesPd is not defined. Run setup cells first.")

Converting to Dense Matrices (via DataFrames)...
Building Temp Affinity DataFrame...
Temp DF Head (Indexed by QualityId):
           temperatureFav  tempAffectedRatio  tempThreshold
qualityId                                                  
101034430            19.5            0.00105            0.0
101034090            19.5            0.00105            0.0
101031007            19.5            0.00105            0.0
101034450            19.5            0.00105            0.0
101034510            22.0            0.00100            0.0

Building Struct Affinity DataFrame...
Struct DF Head (Cols 0-5, Indexed by QualityId, Named Columns):
           struct_0  struct_1  struct_2  struct_3  struct_4  struct_5
101034430       1.0       0.3       0.3       0.3       0.3      0.30
101034090       0.0       1.0       0.0       0.0       0.0      0.00
101031007       0.0       1.0       0.0       0.0       0.0      0.00
101034450       0.0       1.0       0.0       0.0       0.0      0.00
10103

### 核心计算准备 1.5：时段亲和度配置构建 (Period Affinity DataFrame Construction)

构建 `df_period_affinity`，关联 Fish Release -> End -> Period Group -> Period Activity.
目标是得到每个 Fish Release 在每个时段 (PeriodId) 的活跃度系数。

In [11]:
# 1. Load Source JSONs
import json
import pandas as pd

# Load files
data_root = r"d:\fishinggame\precompute\data\1\1001"

with open(f"{data_root}\\fish_release.json", 'r', encoding='utf-8') as f:
    fish_release_data = json.load(f)

with open(f"{data_root}\\stock_release.json", 'r', encoding='utf-8') as f:
    stock_release_data = json.load(f)

with open(f"{data_root}\\fish_env_affinity.json", 'r', encoding='utf-8') as f:
    fish_env_data = json.load(f)

with open(f"{data_root}\\period_affinity.json", 'r', encoding='utf-8') as f:
    period_affinity_data = json.load(f)

# 2. Convert to DataFrames
df_fish_release = pd.DataFrame.from_dict(fish_release_data, orient='index')
df_stock_release = pd.DataFrame.from_dict(stock_release_data, orient='index')
df_fish_env = pd.DataFrame.from_dict(fish_env_data, orient='index')
df_period_affinity_list = pd.DataFrame.from_dict(period_affinity_data, orient='index')

# 3. Rename columns to avoid collisions and clarify semantics
df_fish_release = df_fish_release.rename(columns={'id': 'releaseId', 'name': 'fishName'})
df_stock_release = df_stock_release.rename(columns={'id': 'mappingId'}) # stock_release id is just a mapping id
df_fish_env = df_fish_env.rename(columns={'id': 'envId', 'name': 'envName', 'periodCoeffGroup': 'periodGroupLink'})
df_period_affinity_list = df_period_affinity_list.rename(columns={'id': 'periodEntryId'})

# 4. Perform Merges (Long Format Construction)

# Step 4a: Fish Release -> Stock Release (Link via releaseId)
df_merged_1 = pd.merge(
    df_fish_release[['releaseId', 'fishName']], 
    df_stock_release[['releaseId', 'fishEnvId']], 
    on='releaseId', 
    how='inner'
)

# Step 4b: -> Fish Env (Link via fishEnvId == envId)
df_merged_2 = pd.merge(
    df_merged_1,
    df_fish_env[['envId', 'periodGroupLink']],
    left_on='fishEnvId',
    right_on='envId',
    how='inner'
)

# Step 4c: -> Period Affinity (Link via periodGroupLink == periodGroup)
df_period_long = pd.merge(
    df_merged_2,
    df_period_affinity_list[['periodGroup', 'periodId', 'periodActivityFactor']],
    left_on='periodGroupLink',
    right_on='periodGroup',
    how='inner'
)

# -------------------------------------------------------------------------
# 5. Pivot to Wide Format (User Request: Named Columns)
# -------------------------------------------------------------------------

# Define Mapping (Based on data_formula.md)
period_id_to_name = {
    101060001: "period6_9",
    101060002: "period9_12",
    101060003: "period12_15",
    101060004: "period15_18",
    101060005: "period18_21",
    101060006: "period21_24",
    101060007: "period0_3",
    101060008: "period3_6"
}

# Create 'periodName' column
df_period_long['periodName'] = df_period_long['periodId'].map(period_id_to_name)

# Pivot: Index=[releaseId, fishName], Columns=periodName, Values=periodActivityFactor
df_period_wide = df_period_long.pivot_table(
    index=['releaseId', 'fishName'], 
    columns='periodName', 
    values='periodActivityFactor',
    fill_value=0 # Default to 0 if missing
)

# Reset index to make releaseId a column again (optional, depending on usage)
# df_period_wide.reset_index(inplace=True)

print(f"Fish Release Count: {len(df_fish_release)}")
print(f"Wide Period DF Shape: {df_period_wide.shape}")
print("\nWide Period DataFrame Head:")
display(df_period_wide.head())

# Check for a specific fish
sample_name = "Release_Alewife_Young_dew"
# Filter using index level if checking by name
if sample_name in df_period_wide.index.get_level_values('fishName'):
    print(f"\nSample Data for '{sample_name}':")
    display(df_period_wide.query(f"fishName == '{sample_name}'"))
else:
    print(f"\nSample fish '{sample_name}' not found.")

Fish Release Count: 419
Wide Period DF Shape: (419, 8)

Wide Period DataFrame Head:


Unnamed: 0_level_0,periodName,period0_3,period12_15,period15_18,period18_21,period21_24,period3_6,period6_9,period9_12
releaseId,fishName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
300010,Release_Alewife_Young_dew,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
300020,Release_Alewife_Common_dew,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
300030,Release_Alewife_Trophy_dew,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
300040,Release_Alewife_Unique_dew,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
300050,Release_Alewife_Apex_dew,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0



Sample Data for 'Release_Alewife_Young_dew':


Unnamed: 0_level_0,periodName,period0_3,period12_15,period15_18,period18_21,period21_24,period3_6,period6_9,period9_12
releaseId,fishName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
300010,Release_Alewife_Young_dew,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### 核心计算准备 2：加载体素数据 (Load Voxel Data)
读取 `Global.npy`，提取 Structural Slots 和 Depth Layer 信息。

*   **Data**: `[X, Z, C]`
*   **Target**: `struct_slots [X, Z, 3]`, `depth_layer [X, Z]` (+ Y extension)

In [12]:
# Load Voxel Data
# File Path: .../Fishing_1006001_Global.npy
# Based on map_data.json, this is a Dense 3D array [X, Y, Z].
print(f"Loading Voxel Data from: {test_path}")
voxel_data = np.load(test_path)

print(f"Loaded Voxel Data Shape: {voxel_data.shape}, dtype={voxel_data.dtype}")

# ------------------------------------------------------
# Voxel Data Interpretation (Bitmask)
# ------------------------------------------------------
# According to VoxelMapDataFormat.md, the int32 value is a Bitmask.
# Bit 0: Water, Bit 1: Grass, Bit 2: Stone, ...
# StructType IDs in 'struct_affinity.json' (0, 1, 2...) correspond to these Bit Indices.

# We will use the raw 3D bitmask array for calculation.
# struct_slots_map is essentially the voxel_data itself.
struct_bitmask_map = voxel_data.astype(np.int32)

# Depth/Layer info is DERIVED from Y coordinate or separate channel?
# Note: The Global.npy appears to only contain the bitmask (int32).
# If we need Depth Layer, we might need to compute it from the 'Water' bit distribution 
# or read a separate file if available. 
# For this DEMO, we will assume Layer 0 (Surface) to Layer N (Bottom) maps to Y indices.
# Or we define a simple dummy depth map based on (Y / MaxY).
dim_y = struct_bitmask_map.shape[1]
# Create normalized depth [0, 1] for temp calculation. 
# 0 = Bottom, 1 = Surface? Or inverse. Usually Surface is Top (High Y).
y_indices = np.arange(dim_y)
normalized_depth_map = col_vec = y_indices[None, :, None] / float(dim_y) # [1, Y, 1] broadcastable

print(f"Struct Bitmask Map Shape: {struct_bitmask_map.shape}")
print("Sample Bitmask Values:", np.unique(struct_bitmask_map.flatten())[:10])

Loading Voxel Data from: D:\fishinggame\ExportedData\Fishing_1006001_Dense_20260107_154037\Fishing_1006001_Global.npy
Loaded Voxel Data Shape: (134, 8, 134), dtype=int32
Struct Bitmask Map Shape: (134, 8, 134)
Sample Bitmask Values: [ 0  1  2  3  4  5  8  9 12 13]


### 核心计算 3：批量亲和度计算 (Data-Driven Batch Calculation)

1.  **AffStruct**: `StructMatrix` vs `StructSlotsMap` (Gather)
2.  **AffTemp**: `TempParams` vs `DepthMap` (Gaussian)
3.  **Synthesis**: `EnvCoeff`

In [None]:
# -------------------------------------------------------------------------
# RELOAD CONFIGS FOR PARAMETERIZED CALCULATION (Corrected Scaling)
# -------------------------------------------------------------------------

# 1. Load Constants
print("Searching for Constants in fish_env_data...")
const_key = None
# (Same logic as before)
TEMP_TOLERANCE_WIDTH = 6.0 

# 2. Load Pond/Map Params (Mocked for Proof)
POND_ID = 1001
BOTTOM_T = 10.0 
SURFACE_T = 25.0 

print(f"Using Params: SurfaceT={SURFACE_T}, BottomT={BOTTOM_T}, ToleranceWidth={TEMP_TOLERANCE_WIDTH}")

# 3. Base Weight
if 'probWeightIdeal' not in stockFishesPd.columns:
    stockFishesPd = pd.merge(
        stockFishesPd, 
        df_fish_release[['releaseId', 'probWeightIdeal']], 
        left_on='releaseId', 
        right_on='releaseId', 
        how='left'
    )
    stockFishesPd['probWeightIdeal'].fillna(0, inplace=True)

base_weight_vec = stockFishesPd['probWeightIdeal'].values.astype(np.float32)
print(f"Base Weight Vector Shape: {base_weight_vec.shape}")

# -------------------------------------------------------------------------
# RE-ROUTINE CORE CALCULATION
# -------------------------------------------------------------------------

# A. Struct Affinity (Reuse)
print("Re-calculating Struct Affinity...")
X, Y, Z_dim = struct_bitmask_map.shape
F = m_struct.shape[0]
S = m_struct.shape[1] 
aff_struct_final = np.zeros((X, Y, Z_dim, F), dtype=np.float16)
RELEVANT_BITS = 12 
for bit_idx in range(RELEVANT_BITS):
    mask = (struct_bitmask_map >> bit_idx) & 1
    if np.sum(mask) == 0: continue
    if bit_idx < S:
        coeffs = m_struct[:, bit_idx]
        updates = mask[..., np.newaxis] * coeffs[np.newaxis, np.newaxis, np.newaxis, :]
        aff_struct_final = np.maximum(aff_struct_final, updates)

# B. Temp Affinity (Parameterized & Fixed Scaling)
print("Re-calculating Temp Affinity (Parameterized & Fixed)...")
t_map = SURFACE_T + (BOTTOM_T - SURFACE_T) * normalized_depth_map
t_map_3d = np.tile(t_map, (X, 1, Z_dim)) 
t_map_4d = t_map_3d[..., np.newaxis]

t_fav = m_temp[:, 0]
# *** SCALING FIX ***
# Cell 12 applied /10000. Raw is ~10. We want ~1.0. 
# So correct by *1000 (Resulting in Raw / 10).
t_ratio = m_temp[:, 1] * 1000.0  

print(f"Sample t_ratio (Fixed): {t_ratio[:5]}")

# Formula: exp( - (T - Tfav)^2 / (Width * Ratio^2) )
diff_sq = (t_map_4d - t_fav) ** 2
denom = TEMP_TOLERANCE_WIDTH * (t_ratio ** 2)
denom[denom < 1e-5] = 1e-5

aff_temp_final = np.exp(- diff_sq / denom)
print(f"Aff Temp Mean: {np.mean(aff_temp_final)}")

# C. Synthesis
print("Calculating Final Synthesis (BaseWeight * Struct * Temp)...")

# EnvCoeff = Struct * Temp (Using implicit max(0) since positive)
env_coeff = aff_struct_final * aff_temp_final

# Final Weight = BaseWeight * EnvCoeff
final_weight_tensor = base_weight_vec[np.newaxis, np.newaxis, np.newaxis, :] * env_coeff

print(f"Final Weight Tensor Shape: {final_weight_tensor.shape}")
print(f"Max Weight: {np.max(final_weight_tensor)}")
print(f"Mean Weight: {np.mean(final_weight_tensor)}")

# Quick Histogram (Print counts)
non_zero_count = np.sum(final_weight_tensor > 0.01)
print(f"Voxels with Weight > 0.01: {non_zero_count} / {final_weight_tensor.size}")


Searching for Constants in fish_env_data...
Using Params: SurfaceT=25.0, BottomT=10.0, ToleranceWidth=50.0
Base Weight Vector Shape: (35,)
Re-calculating Struct Affinity...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  stockFishesPd['probWeightIdeal'].fillna(0, inplace=True)


Re-calculating Temp Affinity (Parameterized & Fixed)...
Sample t_ratio (Fixed): [1.05 1.05 1.05 1.05 1.  ]
Aff Temp Mean: 0.7017224427795581
Calculating Final Synthesis (BaseWeight * Struct * Temp)...
Final Weight Tensor Shape: (134, 8, 134, 35)
Max Weight: 249929.14836380148
Mean Weight: 995.7949859033392
Voxels with Weight > 0.01: 199787 / 5027680


In [14]:
# -----------------------------------------------------
# A. Struct Affinity Calculation (Bit-wise Iteration)
# -----------------------------------------------------
# Goal: affStruct[x, y, z, f] = Max over all PRESENT struct types
# Input:
#   struct_bitmask_map: [X, Y, Z] (int32)
#   m_struct (DataFrame Matrix): [F, S] (Rows: Fish, Cols: StructTypeID)

X, Y, Z_dim = struct_bitmask_map.shape
F = m_struct.shape[0]
S = m_struct.shape[1] # Max Struct Types (e.g., 30)

print(f"Calculating Struct Affinity for Grid [{X}x{Y}x{Z_dim}] and {F} Fish...")

# Initialize Result with zeros (or min affinity)
aff_struct_final = np.zeros((X, Y, Z_dim, F), dtype=np.float16)

# BITMASK_TO_STRUCT_ID Mapping
# Assuming Bit K corresponds to StructTypeID K for now.
# Iterate only relevant bits (e.g. 0 to 11)
RELEVANT_BITS = 12 

for bit_idx in range(RELEVANT_BITS):
    # 1. Create Boolean Mask for this Struct Type
    # Check if Bit is set: (Val >> bit) & 1
    mask = (struct_bitmask_map >> bit_idx) & 1
    # mask shape: [X, Y, Z], 0 or 1
    
    if np.sum(mask) == 0:
        continue # Skip if this feature never appears
        
    # 2. Get Affinity Coeffs for this Struct Type (for all fish)
    # m_struct[:, bit_idx] -> Shape [F]
    if bit_idx < S:
        coeffs = m_struct[:, bit_idx] # [F]
        
        # 3. Broadcast and Update Max
        # mask: [X, Y, Z, 1]
        # coeffs: [1, 1, 1, F]
        # update: [X, Y, Z, F]
        
        # Optim: multiplication by boolean mask
        # We want: where(mask, coeff, current_max)
        # But simple max(current, mask * coeff) works if coeff >= 0
        
        updates = mask[..., np.newaxis] * coeffs[np.newaxis, np.newaxis, np.newaxis, :]
        aff_struct_final = np.maximum(aff_struct_final, updates)

print(f"AffStruct Final Shape: {aff_struct_final.shape}")


# -----------------------------------------------------
# B. Temp Affinity Calculation (Using 3D/Y-based Depth)
# -----------------------------------------------------
# T_pixel = SurfaceT + (BottomT - SurfaceT) * Depth (0-1)
SURFACE_T = 25.0
BOTTOM_T = 10.0

# BroadCast Pre-calc: normalized_depth_map is [1, Y, 1]
# broadcast to [X, Y, Z]
t_map = SURFACE_T + (BOTTOM_T - SURFACE_T) * normalized_depth_map
t_map = np.tile(t_map, (X, 1, Z_dim)) # Expand to full grid if needed, or keep broadcast

t_map_expanded = t_map[..., np.newaxis] # [X, Y, Z, 1]

t_fav = m_temp[:, 0]
t_ratio = m_temp[:, 1]
WIDTH_CONST = 50.0 

diff_sq = (t_map_expanded - t_fav) ** 2
denom = WIDTH_CONST * (t_ratio ** 2)
denom[denom < 1e-5] = 1e-5

aff_temp_final = np.exp(- diff_sq / denom)
print(f"AffTemp Final Shape: {aff_temp_final.shape}")

# -----------------------------------------------------
# C. Synthesis
# -----------------------------------------------------
# EnvCoeff [X, Y, Z, F]
env_coeff_final = aff_struct_final * aff_temp_final
print(f"EnvCoeff Final Shape: {env_coeff_final.shape}")

Calculating Struct Affinity for Grid [134x8x134] and 35 Fish...
AffStruct Final Shape: (134, 8, 134, 35)
AffTemp Final Shape: (134, 8, 134, 35)
EnvCoeff Final Shape: (134, 8, 134, 35)
