In [None]:
import numpy as np
import json
import pandas as pd

### 链条式找配置，并组装紧密数组（每张地图只需做一次，可用于生成一串派生环境场）

In [6]:
import re
from pathlib import Path
import json

"""
数据处理步骤 (Metaphysical Data Processing Steps):

1.  **数据源与目标确立 (Source & Target Definition)**
    - Source: 位于 `precompute/data` 下的静态配置表 (JSON)，定义了游戏世界的逻辑结构。
    - Target: 位于 `ExportedData` 下的 Numpy 数组，代表了具体场景的物理数据 (Voxel/Grid)。

2.  **标识符提取 (Identifier Extraction)**
    - 从物理数据的文件名/路径中提取关键标识符 (SceneID / AssetID)。
    - 例如: 从 `Fishing_1006001_Global.npy` 提取 `1006001`。

3.  **逻辑映射建立 (Logical Mapping)**
    - 利用中间件/配置表 (`map_scene.json`) 将物理标识符 (AssetID) 映射回系统逻辑标识符 (MapID)。
    - 这一步是连接“即时演算数据”与“策划配置数据”的关键桥梁。

4.  **上下文关联与整合 (Context Association & Integration)**
    - 以 MapID 为锚点，级联查询关联的 Pond, Stock, Fish 配置。
    - 将分散的数据整合为适合并行计算的结构化格式 (Numpy/Pandas)。
"""

# 配置路径
DATA_ROOT = Path(r'D:\fishinggame\precompute\data\1\1001')
EXPORTED_DATA_ROOT = Path(r'D:\fishinggame\ExportedData')

# 加载 map_scene.json
with open(DATA_ROOT / 'map_scene.json', 'r', encoding='utf-8') as f:
    map_scene = json.load(f)

# 建立 assetId -> map_id 的反向索引
asset_to_map = {info['assetId']: int(map_id) for map_id, info in map_scene.items() if info.get('assetId')}
print(f'已加载 {len(map_scene)} 个地图配置，其中 {len(asset_to_map)} 个有 assetId')
print(f'assetId -> map_id 映射: {asset_to_map}')

已加载 9 个地图配置，其中 6 个有 assetId
assetId -> map_id 映射: {'1001001': 1001, '1002001': 1002, '1003001': 1003, '1004001': 1004, '1004002': 1008, '1006001': 1009}


In [7]:
def get_scene_id_from_path(npy_path: str) -> str:
    """从npy文件路径中提取scene_id (如 Fishing_1006001_Global.npy -> '1006001')"""
    match = re.search(r'Fishing_(\d+)', str(npy_path))
    if not match:
        raise ValueError(f'无法从路径中提取scene_id: {npy_path}')
    return match.group(1)

def get_map_id_from_scene_id(scene_id: str) -> int:
    """根据scene_id查找对应的map_id"""
    if scene_id in asset_to_map:
        return asset_to_map[scene_id]
    raise ValueError(f'找不到scene_id {scene_id} 对应的map_id')

def get_map_id_from_npy_path(npy_path: str) -> int:
    """从npy文件路径直接获取map_id"""
    scene_id = get_scene_id_from_path(npy_path)
    return get_map_id_from_scene_id(scene_id)

# 测试示例
test_path = r'D:\fishinggame\ExportedData\Fishing_1006001_Dense_20260107_154037\Fishing_1006001_Global.npy'
scene_id = get_scene_id_from_path(test_path)
map_id = get_map_id_from_scene_id(scene_id)
print(f'文件路径: {test_path}')
print(f'提取的 scene_id: {scene_id}')
print(f'对应的 map_id: {map_id}')
print(f'地图信息: {map_scene[str(map_id)]}')

文件路径: D:\fishinggame\ExportedData\Fishing_1006001_Dense_20260107_154037\Fishing_1006001_Global.npy
提取的 scene_id: 1006001
对应的 map_id: 1009
地图信息: {'id': 1009, 'name': 'map_base_6', 'desc': 106, 'assetId': '1006001', 'originOffsetX': 0, 'originOffsetY': 0, 'offsetX': 0, 'offsetY': 0, 'sizeX': 1000, 'sizeY': 1000, 'rotate': 0, 'mark': ''}


#### 对于局部池取stock
* 
* 去D:\fishinggame\precompute\data\1\1001\fish_stock.json当中，

In [8]:
# 加载额外的配置表
with open(DATA_ROOT / 'fish_pond_list.json', 'r', encoding='utf-8') as f:
    fish_pond_list = json.load(f)

with open(DATA_ROOT / 'fish_stock.json', 'r', encoding='utf-8') as f:
    fish_stock_config = json.load(f)

print(f"已加载 {len(fish_pond_list)} 个鱼塘配置")
print(f"已加载 {len(fish_stock_config)} 个 Stock 配置")

# 获取 map_id 对应的 map_scene 配置
current_map_info = map_scene.get(str(map_id))
if not current_map_info:
    raise ValueError(f"Found no map info for id {map_id}")

map_desc_id = current_map_info.get('desc')
print(f"\n当前地图: {current_map_info['name']} (ID: {map_id})")
print(f"关联的 Desc ID (用于对应 fish_pond_list.mapId): {map_desc_id}")

# 查找关联的 Pond 和 Stock
print(f"\n查找 mapId == {map_desc_id} 的鱼塘...")
related_ponds = [pond for pond in fish_pond_list.values() if pond.get('mapId') == map_desc_id]

if not related_ponds:
    print("警告: 未找到关联的鱼塘配置 (Pond)")
else:
    print(f"找到 {len(related_ponds)} 个关联鱼塘:")
    for pond in related_ponds:
        # 兼容处理: 有些json key可能是str类型的id
        pond_id = pond.get('id')
        pond_name = pond.get('name')
        stock_id = pond.get('fishStockId')
        
        print(f"  - Pond: {pond_name} (ID: {pond_id}) -> Stock ID: {stock_id}")
        
        # 查询 Stock 详情 (注意 key 可能是字符串)
        stock_info = fish_stock_config.get(str(stock_id))
        if stock_info:
             print(f"    Stock 详情: Name={stock_info.get('name')}, ResetTime={stock_info.get('resetDayTime')}")
        else:
             print(f"    警告: 在 fish_stock.json 中未找到 Stock ID {stock_id}")

已加载 7 个鱼塘配置
已加载 6 个 Stock 配置

当前地图: map_base_6 (ID: 1009)
关联的 Desc ID (用于对应 fish_pond_list.mapId): 106

查找 mapId == 106 的鱼塘...
找到 1 个关联鱼塘:
  - Pond: Sunset_Stream (ID: 301020005) -> Stock ID: 301030106
    Stock 详情: Name=stock_sunset, ResetTime=05:00


In [9]:
# 顺着往下进行数据查找和组装numpy，供后面的计算使用。
# 大致思路为：
# 5. 遍历 Stock ID (Stock -> Release)：
#    - 从相关联的池塘中提取 Stock ID，查找其下属的所有 Release ID。
# 6. Eager Loading (Release -> Fish/Env):
#    - 对每个 Release ID，立即提取所需的全部配置信息，包括：
#      - 基础属性: qualityId (即原 fishId), weight/length ranges (min/max).
#      - 关键系数: minEnvCoeff, minAdaptCoeff.
#      - 关联元数据: speciesId, envAffinityId (即原 envId).
# 7. 组装 DataFrame (Assembly):
#    - 将上述所有提取的字段扁平化，组装成 Pandas DataFrame (`stockFishesPd`)。
#    - 每一行代表一个 Release 配置，为后续的概率计算和环境场生成做准备。

In [10]:
import pandas as pd

# Load additional configurations
print("Loading Release and Quality configs...")
with open(DATA_ROOT / 'stock_release.json', 'r', encoding='utf-8') as f:
    stock_release_config = json.load(f)

with open(DATA_ROOT / 'fish_release.json', 'r', encoding='utf-8') as f:
    fish_release_config = json.load(f)

with open(DATA_ROOT / 'basic_fish_quality.json', 'r', encoding='utf-8') as f:
    basic_fish_quality_config = json.load(f)
print("Configs loaded.")

rows = []

# 'related_ponds' should be available from the previous cell execution
# If not, we rely on the logic that this cell is run after Cell 5.
if 'related_ponds' not in locals():
    print("Warning: 'related_ponds' not found. Please ensure the previous cell is executed.")
    unique_stock_ids = set()
else:
    unique_stock_ids = set(pond.get('fishStockId') for pond in related_ponds if pond.get('fishStockId'))

print(f"Processing {len(unique_stock_ids)} unique Stock IDs associated with the current map.")

for stock_id in unique_stock_ids:
    # Find all releases for this stock
    # Note: Scanning all values in stock_release_config might be inefficient for very large datasets,
    # but acceptable for this precompute scope.
    stock_releases = [item for item in stock_release_config.values() if item.get('stockId') == stock_id]
    
    for sr in stock_releases:
        release_id = sr.get('releaseId')
        fish_quality_id = sr.get('fishId') # referred as fishId in stock_release.json, but actually qualityId
        fish_env_affinity_id = sr.get('fishEnvId') # referred as fishEnvId in stock_release.json
        
        # Lookup Release Info
        release_info = fish_release_config.get(str(release_id))
        if not release_info:
            # print(f"Warning: Release ID {release_id} not found in fish_release.json")
            continue
            
        # Lookup Fish Quality Info
        fish_info = basic_fish_quality_config.get(str(fish_quality_id))
        species_id = fish_info.get('species', -1) if fish_info else -1
            
        row = {
            'stockId': stock_id,
            'releaseId': release_id,
            'qualityId': fish_quality_id,
            'envAffinityId': fish_env_affinity_id, # Renamed from envId for clarity
            'speciesId': species_id,
            
            # Release Limits
            'weight_min': release_info.get('weightMin'),
            'weight_max': release_info.get('weightMax'),
            'len_min': release_info.get('lengthMin'),
            'len_max': release_info.get('lengthMax'),

            # Environment Coefficients (Added per request)
            'minEnvCoeff': release_info.get('minEnvCoeff', 0),
            'minAdaptCoeff': release_info.get('minAdaptCoeff', 0),
            
            # Debug/Display info
            'name': release_info.get('name'),
            'probWeight': release_info.get('probWeightIdeal')
        }
        rows.append(row)

stockFishesPd = pd.DataFrame(rows)
print(f"Created stockFishesPd with {len(stockFishesPd)} rows.")
if not stockFishesPd.empty:
    print(stockFishesPd.head().to_string())
    print("\nColumn Types:")
    print(stockFishesPd.dtypes)
else:
    print("DataFrame is empty. Check if stock_release.json maps correctly to the pond stock IDs.")

Loading Release and Quality configs...
Configs loaded.
Processing 1 unique Stock IDs associated with the current map.
Created stockFishesPd with 35 rows.
     stockId  releaseId  qualityId  envAffinityId  speciesId  weight_min  weight_max  len_min  len_max  minEnvCoeff  minAdaptCoeff                                name  probWeight
0  301030106     300500  101034430        1013390  101020063         150         450       26       37            0              0  Release_American_Shad_Young_sunset      250000
1  301030106     300510  101034090        1013050  101020010          50         200       16       26            0              0    Release_Brook_Trout_Young_sunset      100000
2  301030106     300520  101031007        1010066  101020010         200         350       26       32            0              0   Release_Brook_Trout_Common_sunset      100000
3  301030106     300530  101034450        1013410  101020003         150         450       28       40            0              0

In [11]:
# 继续进行后续数据关联 (Data Enrichment Phase II)

# 8. 环境亲和性关联 (Environment Affinity Lookup):
#    - 目标: 丰富鱼类的环境适应参数。
#    - 操作: 使用 `envAffinityId` (原 `fishEnvId`) 关联 `fish_env_affinity.json`。
#    - 提取关键属性 (Attributes Extraction):
#         - 基础ID关联: structId (结构), tempId (温度), layerId (水层), lightId (光照)。
#         - 诱鱼系数: baitCoeffGroup, baitTypeCoeffGroup, periodCoeffGroup (时段)。
#         - 适应性参数: 
#             - pressureSensitivity (气压敏感度)
#             - minAdaptLureRatio / maxAdaptLureRatio (路亚适应比例)
#             - maxAcceptLengthRatio (最大接受长度比)
#         - 衰减配置: underLengthDecayCoeff / overLengthDecayCoeff (体型偏离衰减)。

# 9. 结构体亲和性级联查找 (Structure Affinity Cascade):
#    - 目标: 获取具体的物理结构交互参数。
#    - 操作: 使用步骤 8 获得的 `structId`，查询 `struct_affinity.json`。
#    - 提取参数 (Parameters Extraction): 
#         - `List`: 包含 `structType` (结构类型) 和 `coeff` (系数) 的列表。

# 10. 温度亲和性级联查找 (Temperature Affinity Cascade):
#    - 目标: 获取鱼类对温度的敏感度配置。
#    - 操作: 使用步骤 8 获得的 `tempId`，查询 `temp_affinity.json`。
#    - 提取参数 (Parameters Extraction): 
#         - `temperatureFav`: 最适温度 (注意可能需要缩放，如 220 -> 22.0)。
#         - `tempAffectedRatio`: 温度影响比率。
#         - `tempThreshold`: 温度容忍阈值。

# 11. 水层亲和性级联查找 (Water Layer Affinity Cascade):
#    - 目标: 获取鱼类在不同水层的分布偏好。
#    - 操作: 使用步骤 8 获得的 `layerId`，查询 `water_layer_affinity.json`。
#    - 提取参数 (Parameters Extraction): 
#         - `List`: 包含 `layerType` (水层类型, 如上/中/下) 和 `coeff` (系数) 的列表。

In [12]:
# 9-11. 实现环境与亲和性级联查找 (Env & Affinity Cascade Lookup)

print("Loading Affinity Configs...")
# 1. 加载所有亲和性配置 (Load Configs)
with open(DATA_ROOT / 'fish_env_affinity.json', 'r', encoding='utf-8') as f:
    env_affinity_config = json.load(f)

with open(DATA_ROOT / 'struct_affinity.json', 'r', encoding='utf-8') as f:
    struct_affinity_config = json.load(f)

with open(DATA_ROOT / 'temp_affinity.json', 'r', encoding='utf-8') as f:
    temp_affinity_config = json.load(f)

with open(DATA_ROOT / 'water_layer_affinity.json', 'r', encoding='utf-8') as f:
    layer_affinity_config = json.load(f)
print("Affinity Configs loaded.")

# 2. 准备查找字典 (Prepare Lookup Dicts)
#    优化: 直接构建 id -> data 的快速查找字典，避免每次遍历 list
#    注意: JSON key通常是字符串, DataFrame中Id可能是int, 查找时需注意类型转换

def get_config_by_id(config_dict, target_id):
    """Safe lookup helper handling str/int key mismatch"""
    if target_id is None:
        return None
    return config_dict.get(str(target_id))

# 3. 扩展 DataFrame (Enrich DataFrame)
#    虽然可以使用 apply，但在列数较多且逻辑复杂时，迭代或列表推导式便于调试和错误处理
#    考虑到数据量不大 (几十到几百行)，直接遍历 row 更新字典列表然后重新创建 DF 也是一种清晰的方法
#    或者使用 apply + Series expand

def enrich_row(row):
    # Step 8: Env Affinity Lookup
    env_id = row.get('envAffinityId')
    env_info = get_config_by_id(env_affinity_config, env_id)
    
    extra_data = {}
    
    if env_info:
        # Extract basic Env IDs
        struct_id = env_info.get('structId')
        temp_id = env_info.get('tempId')
        layer_id = env_info.get('layerId')
        light_id = env_info.get('lightId')
        
        extra_data.update({
            'structId': struct_id,
            'tempId': temp_id,
            'layerId': layer_id,
            'lightId': light_id,
            # Coeffs
            'baitCoeffGroup': env_info.get('baitCoeffGroup'),
            'baitTypeCoeffGroup': env_info.get('baitTypeCoeffGroup'),
            'periodCoeffGroup': env_info.get('periodCoeffGroup'),
            # Adaptability Stats
            'pressureSensitivity': env_info.get('pressureSensitivity'),
            'minAdaptLureRatio': env_info.get('minAdaptLureRatio'),
            'maxAdaptLureRatio': env_info.get('maxAdaptLureRatio'),
            'maxAcceptLengthRatio': env_info.get('maxAcceptLengthRatio'),
            'underLengthDecayCoeff': env_info.get('underLengthDecayCoeff'),
            'overLengthDecayCoeff': env_info.get('overLengthDecayCoeff'),
        })
        
        # Step 9: Structure Affinity Cascade
        struct_info = get_config_by_id(struct_affinity_config, struct_id)
        if struct_info:
            extra_data['structList'] = struct_info.get('List') # raw list of {structType, coeff}
        
        # Step 10: Temperature Affinity Cascade
        temp_info = get_config_by_id(temp_affinity_config, temp_id)
        if temp_info:
            extra_data['temperatureFav'] = temp_info.get('temperatureFav')
            extra_data['tempAffectedRatio'] = temp_info.get('tempAffectedRatio')
            extra_data['tempThreshold'] = temp_info.get('tempThreshold')
            
        # Step 11: Water Layer Affinity Cascade
        layer_info = get_config_by_id(layer_affinity_config, layer_id)
        if layer_info:
            extra_data['layerList'] = layer_info.get('List') # raw list of {layerType, coeff}
            
    return pd.Series(extra_data)

# 应用扩展逻辑
print("Enriching DataFrame...")
if not stockFishesPd.empty:
    enriched_columns = stockFishesPd.apply(enrich_row, axis=1)
    
    # Concatenate original df with new columns
    stockFishesPd = pd.concat([stockFishesPd, enriched_columns], axis=1)
    
    print("Enrichment Complete.")
    print(f"New DataFrame Shape: {stockFishesPd.shape}")
    print(stockFishesPd[['qualityId','envAffinityId','temperatureFav', 'structId', 'tempId', 'layerList']].head().to_string())
else:
    print("stockFishesPd is empty, skipping enrichment.")

Loading Affinity Configs...
Affinity Configs loaded.
Enriching DataFrame...
Enrichment Complete.
New DataFrame Shape: (35, 31)
   qualityId  envAffinityId  temperatureFav  structId   tempId                                                                                   layerList
0  101034430        1013390             195   2011030  2021010  [{'layerType': 1, 'coeff': 1}, {'layerType': 2, 'coeff': 1}, {'layerType': 3, 'coeff': 1}]
1  101034090        1013050             195   2010940  2020920  [{'layerType': 1, 'coeff': 1}, {'layerType': 2, 'coeff': 1}, {'layerType': 3, 'coeff': 1}]
2  101031007        1010066             195   2010940  2020920  [{'layerType': 1, 'coeff': 1}, {'layerType': 2, 'coeff': 1}, {'layerType': 3, 'coeff': 1}]
3  101034450        1013410             195   2011040  2021020  [{'layerType': 1, 'coeff': 1}, {'layerType': 2, 'coeff': 1}, {'layerType': 3, 'coeff': 1}]
4  101034510        1013470             220   2011060  2021040  [{'layerType': 1, 'coeff': 1}, {'l