# 处理非结构化及半结构化数据文件

File()函数，也可以直接处理非结构化及半结构化数据文件及数据库数据。它是Open_Entry更底层的实现。

## 初始化环境

In [None]:
from spdm.data.File import File
from spdm.utils.logger import logger
from pathlib import Path

## SpDM 处理Python中非结构化数据

In [3]:
with File("./data/g070754.05000", mode="r", format="GEQdsk") as fid:
    doc = fid.read()
    eq_test = doc.dump()

In [4]:
#### eq_test是Python中的字典，其key遵守IMAS IDS的组织结构
eq_test.keys()

dict_keys(['wall', 'equilibrium'])

In [None]:
#### 查看equilibrium中的数据
eq_test["equilibrium"].keys()

dict_keys(['time', 'vacuum_toroidal_field', 'time_slice'])

## SpDM 访问Python中半结构化数据

### **NameList**文件

In [5]:
###以GENRAY的输入文件genray.dat为例，它是一个NameList格式的文件。
with File(f"./data/genray.dat", format= "namelist" ,mode="r") as oid:
### 仅仅建立链接，而没有拿回数据
    output_entry = oid.read()
### 拿回所有数据
    output_data = oid.read().dump()
###获取namelist中的关键字key
logger.info(output_data.keys())
### 通过child获取某个子节点的值，而不需要把整个树读回来
logger.info(output_entry.child("main_lobes").__value__)

[0;37m2023-11-15 14:18:53,280 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/utils/sp_export.py:66:sp_load_module: Load module spdm.plugins.data.plugin_namelist[0m
[0;34m2023-11-15 14:18:53,356 [    spdm]     INFO: dict_keys(['main_lobes', 'genr', 'tokamak', 'wave', 'scatnper', 'dispers', 'numercl', 'output', 'plasma', 'species', 'varden', 'denprof', 'tpopprof', 'vflprof', 'zprof', 'tprof', 'grill', 'rz_launch_grill_tab', 'eccone', 'dentab', 'dentab_nonuniform_line', 'temtab', 'temtab_nonuniform_line', 'tpoptab', 'tpoptab_nonuniform_line', 'vflowtab', 'vflowtab_nonuniform_line', 'zeftab', 'zeftab_nonuniform_line', 'read_diskf', 'emission', 'ox', 'adj_nml', 'edge_prof_nml', 'lsc_approach_nml', 'edctsctab', 'jpartsctab'])[0m
[0;34m2023-11-15 14:18:53,357 [    spdm]     INFO: OrderedDict([('total_grills', 5),
             ('main_grills', 4),
             ('bt_direction', 1),
             ('power_fraction_factor', 0.8248)])[0m




In [6]:
###获取namelist中的关键字key
logger.info(output_data.keys())
### 通过child获取某个子节点的值，而不需要把整个树读回来
logger.info(output_entry.child("main_lobes").__value__)

[0;34m2023-11-15 14:19:06,575 [    spdm]     INFO: dict_keys(['main_lobes', 'genr', 'tokamak', 'wave', 'scatnper', 'dispers', 'numercl', 'output', 'plasma', 'species', 'varden', 'denprof', 'tpopprof', 'vflprof', 'zprof', 'tprof', 'grill', 'rz_launch_grill_tab', 'eccone', 'dentab', 'dentab_nonuniform_line', 'temtab', 'temtab_nonuniform_line', 'tpoptab', 'tpoptab_nonuniform_line', 'vflowtab', 'vflowtab_nonuniform_line', 'zeftab', 'zeftab_nonuniform_line', 'read_diskf', 'emission', 'ox', 'adj_nml', 'edge_prof_nml', 'lsc_approach_nml', 'edctsctab', 'jpartsctab'])[0m
[0;34m2023-11-15 14:19:06,580 [    spdm]     INFO: OrderedDict([('total_grills', 5),
             ('main_grills', 4),
             ('bt_direction', 1),
             ('power_fraction_factor', 0.8248)])[0m


In [None]:
#### NotImplementedError: TODO: NAMELISTFile.write
# with File(f"{DATA_INPUT}/test-eq.h5", mode="w", format="namelist") as oid:
#     oid.write(eq_test)

### **netCDF**文件


In [7]:
### 以GENRAY的剖面输入文件genray_profs_in.nc为例，它是一个netCDF格式的文件。
with File(f"./data/genray_profs_in.nc", format= "NetCDf" ,mode="r") as oid:
### 仅仅建立链接，而没有拿回数据
    output_entry = oid.read()
### 拿回所有数据
    output_data = oid.read().dump()
###获取namelist中的关键字key
logger.info(output_data.keys())
logger.info(output_data["dmass"].data)
### 通过child获取某个子节点的值，而不需要把整个树读回来
# logger.info(output_entry.child("dmass").__value__)

[0;37m2023-11-15 14:19:21,349 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/utils/sp_export.py:66:sp_load_module: Load module spdm.plugins.data.plugin_netcdf[0m
[0;37m2023-11-15 14:19:21,373 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/plugins/data/plugin_netcdf.py:149:open: Open NetCDF File ./data/genray_profs_in.nc mode=Mode.read[0m
[0;34m2023-11-15 14:19:21,379 [    spdm]     INFO: dict_keys(['charge', 'dmass', 'en', 'eqdsk_name', 'nj', 'nspecgr', 'r', 'temp', 'zeff', 'title'])[0m
[0;34m2023-11-15 14:19:21,380 [    spdm]     INFO: [1.00000000e+00 1.83619995e+03 3.67239990e+03 2.20343994e+04][0m


In [8]:
#### 写.nc文件
with File(f"./data/test-eq.nc", mode="w", format="netcdf") as oid:
    oid.write(eq_test)
with File(f"./data/test-eq.nc", format= "NetCDf" ,mode="r") as oid: 
    test_data = oid.read().dump()
    logger.info(test_data.keys())
    logger.info(test_data["wall"])

[0;37m2023-11-15 14:19:32,717 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/plugins/data/plugin_netcdf.py:149:open: Open NetCDF File ./data/test-eq.nc mode=Mode.create|write[0m
[0;37m2023-11-15 14:19:32,741 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/plugins/data/plugin_netcdf.py:149:open: Open NetCDF File ./data/test-eq.nc mode=Mode.read[0m
[0;34m2023-11-15 14:19:32,745 [    spdm]     INFO: dict_keys(['wall', 'equilibrium'])[0m
[0;34m2023-11-15 14:19:32,746 [    spdm]     INFO: {'description_2d': {'0': {'limiter': {'unit': {'0': {'outline': {'r': array([1.35838, 1.35838, 1.35838, 1.36314, 1.4371 , 1.43721, 1.44164,
       1.4297 , 1.39169, 1.39151, 1.37929, 1.399  , 1.43623, 1.45919,
       1.47791, 1.54494, 1.61204, 1.63506, 1.66054, 1.66519, 1.6955 ,
       1.70668, 1.71388, 1.714  , 1.73649, 1.76324, 1.80199, 1.80237,
       1.93972, 1.97063, 2.07495, 2.1771 , 2.27925, 2.35   , 2.35   ,
       2.35   , 2.27925, 2.1771 , 2.07495, 1.

### **HDF5**文件

In [9]:
### 以testall.h5，它是一个HDF5格式的文件。
with File(f"./data/testall.h5", format= "HDF5" ,mode="r") as oid:
### 仅仅建立链接，而没有拿回数据
    output_entry = oid.read()
### 拿回所有数据
    output_data = oid.read().dump()
###获取namelist中的关键字key
logger.info(output_data.keys())
logger.info(output_data["coherent_wave"]["global_quantities"])

### 通过child获取某个子节点的值，而不需要把整个树读回来
# logger.info(output_entry.child("coherent_wave.current_tor").__value__)

[0;37m2023-11-15 14:19:43,508 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/utils/sp_export.py:66:sp_load_module: Load module spdm.plugins.data.plugin_hdf5[0m
[0;37m2023-11-15 14:19:43,511 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/plugins/data/plugin_hdf5.py:197:open: Open HDF5 File ./data/testall.h5 mode=Mode.read[0m
[0;34m2023-11-15 14:19:43,518 [    spdm]     INFO: dict_keys(['coherent_wave'])[0m
[0;34m2023-11-15 14:19:43,519 [    spdm]     INFO: {'current_tor': 461821.31203568814,
 'frequency': 4600000000.0,
 'power': 19375694355948.902}[0m


In [10]:
#### 写.nc文件
with File(f"./data/test-eq.hdf5", mode="w", format="HDF5") as oid:
    oid.write(eq_test)
with File(f"./data/test-eq.hdf5", format= "HDF5" ,mode="r") as oid: 
    test_data = oid.read().dump()
    logger.info(test_data.keys())
    logger.info(test_data["wall"])

[0;37m2023-11-15 14:19:47,011 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/plugins/data/plugin_hdf5.py:197:open: Open HDF5 File ./data/test-eq.hdf5 mode=Mode.create|write[0m
[0;37m2023-11-15 14:19:47,034 [    spdm]    DEBUG: /home/salmon/workspace/fytok/SpDM/python/spdm/plugins/data/plugin_hdf5.py:197:open: Open HDF5 File ./data/test-eq.hdf5 mode=Mode.read[0m
[0;34m2023-11-15 14:19:47,044 [    spdm]     INFO: dict_keys(['equilibrium', 'wall'])[0m
[0;34m2023-11-15 14:19:47,046 [    spdm]     INFO: {'description_2d': [{'limiter': {'unit': [{'outline': {'r': array([1.35838, 1.35838, 1.35838, 1.36314, 1.4371 , 1.43721, 1.44164,
       1.4297 , 1.39169, 1.39151, 1.37929, 1.399  , 1.43623, 1.45919,
       1.47791, 1.54494, 1.61204, 1.63506, 1.66054, 1.66519, 1.6955 ,
       1.70668, 1.71388, 1.714  , 1.73649, 1.76324, 1.80199, 1.80237,
       1.93972, 1.97063, 2.07495, 2.1771 , 2.27925, 2.35   , 2.35   ,
       2.35   , 2.27925, 2.1771 , 2.07495, 1.9728 , 1.97063

注：（1）其他的半结构化数据,YAML,JSON等，访问形式类似。