In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error

# 计算不同星表之间的MAE等指标
需要先对星表进行交叉匹配，这里读取交叉匹配后的数据

# 1. SpecTE-GALAH星表

In [15]:
# GALAH星表
path = r'../match/match_SpecTE-GALAHdr4.csv'
match_GALAH_SpecTE_catalog=pd.read_csv(path)

In [16]:
match_GALAH_SpecTE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51763 entries, 0 to 51762
Data columns (total 63 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   GALAH_sobject_id     51763 non-null  float64
 1   GALAH_RA             51763 non-null  float64
 2   GALAH_Dec            51763 non-null  float64
 3   GALAH_Teff[K]        51763 non-null  float64
 4   GALAH_Logg           51763 non-null  float64
 5   GALAH_RV             51763 non-null  float64
 6   GALAH_FeH            51763 non-null  float64
 7   GALAH_MgH            51653 non-null  float64
 8   GALAH_SiH            51597 non-null  float64
 9   GALAH_KH             51706 non-null  float64
 10  GALAH_CaH            51740 non-null  float64
 11  GALAH_TiH            51739 non-null  float64
 12  GALAH_CrH            51738 non-null  float64
 13  GALAH_NiH            51750 non-null  float64
 14  GALAH_CH             51738 non-null  float64
 15  GALAH_OH             51526 non-null 

In [17]:
# 统一名字方便代码复用
df = match_GALAH_SpecTE_catalog

# 初始化存储结果的列表
results = []

# 参数列表 需要上下一一对应
GALAH_catalog_label = ['GALAH_Teff[K]','GALAH_Logg', 'GALAH_RV',
                'GALAH_FeH', 'GALAH_MgH', 'GALAH_SiH', 'GALAH_KH', 'GALAH_CaH',
                'GALAH_TiH', 'GALAH_CrH', 'GALAH_NiH', 'GALAH_CH', 'GALAH_OH',
                'GALAH_AlH', 'GALAH_MnH', 'GALAH_NaH', 'GALAH_VH']
SpecTE_catalog_label = ['Teff[K]', 'Logg', 'RV', 'FeH', 'MgH', 'SiH', 'KH', 'CaH', 'TiH', 'CrH', 'NiH', 'CH', 'OH', 'AlH', 'MnH', 'NaH', 'VH']



# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(GALAH_catalog_label, SpecTE_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


   Parameter  diff_mean    diff_std        MAE
0    Teff[K]  19.460700  155.528897  85.742087
1       Logg   0.019141    0.219635   0.118555
2         RV  -0.123528   10.964424   4.392765
3        FeH  -0.003704    0.107662   0.066694
4        MgH   0.014644    0.108672   0.074570
5        SiH  -0.000472    0.102236   0.073388
6         KH  -0.031369    0.231162   0.169824
7        CaH  -0.000564    0.115830   0.080776
8        TiH   0.107884    0.133933   0.136042
9        CrH   0.144493    0.163358   0.175293
10       NiH  -0.011352    0.119071   0.074378
11        CH   0.000927    0.182991   0.126759
12        OH   0.006881    0.259169   0.187213
13       AlH  -0.076222    0.190808   0.150047
14       MnH  -0.026443    0.136941   0.095558
15       NaH   0.132113    0.191129   0.185109
16        VH   0.164462    0.170693   0.191414


In [18]:
results_df.to_csv('./result_MAE/GALAH_SpecTE_results.csv', index=False)

# 2.GALAH-APOGEE

In [21]:
# GALAH星表
path = r'F:\My_trial\Fits_preprocessed\other_catalog\match_GALAH_apogee.csv'
match_GALAH_apogee_catalog=pd.read_csv(path)
match_GALAH_apogee_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20977 entries, 0 to 20976
Data columns (total 38 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   GALAH_starid    20977 non-null  object 
 1   GALAH_RA        20977 non-null  float64
 2   GALAH_Dec       20977 non-null  float64
 3   GALAH_Teff[K]   20977 non-null  float64
 4   GALAH_Logg      20977 non-null  float64
 5   GALAH_FeH       20977 non-null  float64
 6   GALAH_MgH       20613 non-null  float64
 7   GALAH_SiH       20844 non-null  float64
 8   GALAH_KH        19621 non-null  float64
 9   GALAH_CaH       20403 non-null  float64
 10  GALAH_TiH       20483 non-null  float64
 11  GALAH_CrH       20787 non-null  float64
 12  GALAH_NiH       19666 non-null  float64
 13  GALAH_CH        7945 non-null   float64
 14  GALAH_OH        19797 non-null  float64
 15  GALAH_AlH       20602 non-null  float64
 16  GALAH_MnH       20809 non-null  float64
 17  ASPCAP_starid   20977 non-null 

In [22]:
# 统一名字方便代码复用
df = match_GALAH_apogee_catalog

# 初始化存储结果的列表
results = []

# 参数列表
GALAH_catalog_label = ['GALAH_Teff[K]', 'GALAH_Logg', 'GALAH_FeH', 'GALAH_MgH', 'GALAH_SiH', 'GALAH_KH', 'GALAH_CaH', 'GALAH_TiH', 'GALAH_CrH', 'GALAH_NiH', 'GALAH_CH', 'GALAH_OH', 'GALAH_AlH', 'GALAH_MnH']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]', 'ASPCAP_Logg', 'ASPCAP_FeH', 'ASPCAP_MgH', 'ASPCAP_SiH', 'ASPCAP_KH', 'ASPCAP_CaH', 'ASPCAP_TiH', 'ASPCAP_CrH', 'ASPCAP_NiH', 'ASPCAP_CH', 'ASPCAP_OH', 'ASPCAP_AlH', 'ASPCAP_MnH']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(GALAH_catalog_label, ASPCAP_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


         Parameter  diff_mean    diff_std        MAE
0   ASPCAP_Teff[K]   0.086246  109.034836  72.228294
1      ASPCAP_Logg  -0.026931    0.133338   0.090275
2       ASPCAP_FeH  -0.020208    0.105276   0.072803
3       ASPCAP_MgH  -0.019688    0.137432   0.100732
4       ASPCAP_SiH  -0.031290    0.102146   0.080092
5        ASPCAP_KH  -0.031112    0.252438   0.174172
6       ASPCAP_CaH   0.040445    0.144833   0.108957
7       ASPCAP_TiH   0.085806    0.232145   0.182271
8       ASPCAP_CrH   0.051695    0.266992   0.179080
9       ASPCAP_NiH  -0.030732    0.135470   0.102430
10       ASPCAP_CH   0.087263    0.193296   0.142837
11       ASPCAP_OH   0.042217    0.232229   0.170631
12      ASPCAP_AlH  -0.001976    0.156198   0.117927
13      ASPCAP_MnH  -0.025265    0.166360   0.111607


In [11]:
results_df.to_csv('./分析结果/GALAH_ASPCAP_results.csv', index=False)

# 3. DDPayne-GALAH

In [14]:
# GALAH星表
path = r'../match/match_DDPayne-GALAHdr4.csv'
match_GALAH_DDPayne_catalog=pd.read_csv(path)
match_GALAH_DDPayne_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29290 entries, 0 to 29289
Data columns (total 43 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   SPECID            29290 non-null  object 
 1   RA                29290 non-null  float64
 2   DEC               29290 non-null  float64
 3   SNR_G             29290 non-null  float64
 4   DDPayne_Teff[K]   29290 non-null  float64
 5   DDPayne_Logg      29290 non-null  float64
 6   DDPayne_RV        29290 non-null  float64
 7   DDPayne_FeH       29290 non-null  float64
 8   DDPayne_MgH       29290 non-null  float64
 9   DDPayne_SiH       29290 non-null  float64
 10  DDPayne_CaH       29290 non-null  float64
 11  DDPayne_TiH       29290 non-null  float64
 12  DDPayne_CrH       29290 non-null  float64
 13  DDPayne_NiH       29290 non-null  float64
 14  DDPayne_CH        29290 non-null  float64
 15  DDPayne_NH        29290 non-null  float64
 16  DDPayne_OH        29290 non-null  float6

In [16]:
# 统一名字方便代码复用
df = match_GALAH_DDPayne_catalog

# 初始化存储结果的列表
results = []


 # 4   DDPayne_Teff[K]   66787 non-null  float64
 # 5   DDPayne_Logg      66787 non-null  float64
 # 6   DDPayne_RV        66787 non-null  float64
 # 7   DDPayne_FeH       66787 non-null  float64
 # 8   DDPayne_MgH       66787 non-null  float64
 # 9   DDPayne_SiH       66787 non-null  float64
 # 10  DDPayne_CaH       66787 non-null  float64
 # 11  DDPayne_TiH       66787 non-null  float64
 # 12  DDPayne_CrH       66787 non-null  float64
 # 13  DDPayne_NiH       66787 non-null  float64
 # 14  DDPayne_CH        66787 non-null  float64
 # 15  DDPayne_NH        66787 non-null  float64     #GALAH没有 N/H
 # 16  DDPayne_OH        66787 non-null  float64
 # 17  DDPayne_AlH       66787 non-null  float64
 # 18  DDPayne_MnH       66787 non-null  float64
 # 19  DDPayne_NaH       66787 non-null  float64
    
    
# 参数列表
GALAH_catalog_label   = [  'GALAH_Teff[K]',   'GALAH_Logg',   'GALAH_RV',   'GALAH_FeH',   'GALAH_MgH',   'GALAH_SiH',   'GALAH_CaH',   'GALAH_TiH',   'GALAH_CrH',   'GALAH_NiH',   'GALAH_CH',   'GALAH_OH',   'GALAH_AlH',   'GALAH_MnH',  'GALAH_NaH']
DDPayne_catalog_label = ['DDPayne_Teff[K]', 'DDPayne_Logg', 'DDPayne_RV', 'DDPayne_FeH', 'DDPayne_MgH', 'DDPayne_SiH', 'DDPayne_CaH', 'DDPayne_TiH', 'DDPayne_CrH', 'DDPayne_NiH', 'DDPayne_CH', 'DDPayne_OH', 'DDPayne_AlH', 'DDPayne_MnH','DDPayne_NaH']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(GALAH_catalog_label, DDPayne_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


          Parameter  diff_mean    diff_std        MAE
0   DDPayne_Teff[K]   5.221130  113.689074  75.468410
1      DDPayne_Logg   0.027362    0.212480   0.132405
2        DDPayne_RV   5.358325    9.856935   7.146824
3       DDPayne_FeH   0.037010    0.115374   0.083264
4       DDPayne_MgH   0.015773    0.128911   0.089875
5       DDPayne_SiH   0.105196    0.177270   0.147898
6       DDPayne_CaH   0.001482    0.139142   0.096817
7       DDPayne_TiH   0.053935    0.159795   0.112075
8       DDPayne_CrH   0.032767    0.150460   0.101890
9       DDPayne_NiH   0.026948    0.135297   0.094446
10       DDPayne_CH   0.158601    0.179530   0.201313
11       DDPayne_OH  -0.076719    0.385768   0.297846
12      DDPayne_AlH   0.078520    0.235759   0.176779
13      DDPayne_MnH   0.017209    0.206385   0.128848
14      DDPayne_NaH   0.044817    0.234612   0.169202


In [17]:
results_df.to_csv('./result_MAE/GALAH_DDPayne_results.csv', index=False)

# 4. DDPayne-APOGEE

In [18]:
# GALAH星表
path = r'../match/match_DDPayne-APOGEE.csv'
match_APOGEE_DDPayne_catalog=pd.read_csv(path)
match_APOGEE_DDPayne_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 115134 entries, 0 to 115133
Data columns (total 46 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   SPECID           115134 non-null  object 
 1   RA               115134 non-null  float64
 2   DEC              115134 non-null  float64
 3   SNR_G            115134 non-null  float64
 4   DDPayne_Teff[K]  115134 non-null  float64
 5   DDPayne_Logg     115134 non-null  float64
 6   DDPayne_RV       115134 non-null  float64
 7   DDPayne_FeH      115134 non-null  float64
 8   DDPayne_MgH      115134 non-null  float64
 9   DDPayne_SiH      115134 non-null  float64
 10  DDPayne_CaH      115134 non-null  float64
 11  DDPayne_TiH      115134 non-null  float64
 12  DDPayne_CrH      115134 non-null  float64
 13  DDPayne_NiH      115134 non-null  float64
 14  DDPayne_CH       115134 non-null  float64
 15  DDPayne_NH       115134 non-null  float64
 16  DDPayne_OH       115134 non-null  floa

In [20]:
# 统一名字方便代码复用
df = match_APOGEE_DDPayne_catalog

# 初始化存储结果的列表
results = []


 # 21  ASPCAP_RA        115134 non-null  float64
 # 22  ASPCAP_Dec       115134 non-null  float64
 # 23  ASPCAP_RV        115134 non-null  float64
 # 24  ASPCAP_Teff[K]   115134 non-null  float64
 # 25  ASPCAP_Logg      115134 non-null  float64
 # 26  ASPCAP_FeH       115134 non-null  float64
 # 27  ASPCAP_CH        115134 non-null  float64
 # 28  ASPCAP_CIH       115134 non-null  float64
 # 29  ASPCAP_NH        115134 non-null  float64
 # 30  ASPCAP_OH        115134 non-null  float64
 # 31  ASPCAP_NaH       115134 non-null  float64
 # 32  ASPCAP_MgH       115134 non-null  float64
 # 33  ASPCAP_AlH       115134 non-null  float64
 # 34  ASPCAP_SiH       115134 non-null  float64
 # 35  ASPCAP_SH        115134 non-null  float64
 # 36  ASPCAP_KH        115134 non-null  float64
 # 37  ASPCAP_CaH       115134 non-null  float64
 # 38  ASPCAP_TiH       115134 non-null  float64
 # 39  ASPCAP_VH        115134 non-null  float64
 # 40  ASPCAP_CrH       115134 non-null  float64
 # 41  ASPCAP_MnH       115134 non-null  float64
 # 42  ASPCAP_NiH       115134 non-null  float64
    
# 参数列表
APOGEE_catalog_label   = ['ASPCAP_Teff[K]',  'ASPCAP_Logg',  'ASPCAP_RV',  'ASPCAP_FeH',  'ASPCAP_MgH',  'ASPCAP_SiH',  'ASPCAP_CaH',  'ASPCAP_TiH',  'ASPCAP_CrH',  'ASPCAP_NiH',  'ASPCAP_CH',  'ASPCAP_NH',  'ASPCAP_OH',  'ASPCAP_AlH',  'ASPCAP_MnH', 'ASPCAP_NaH']
DDPayne_catalog_label = ['DDPayne_Teff[K]', 'DDPayne_Logg', 'DDPayne_RV', 'DDPayne_FeH', 'DDPayne_MgH', 'DDPayne_SiH', 'DDPayne_CaH', 'DDPayne_TiH', 'DDPayne_CrH', 'DDPayne_NiH', 'DDPayne_CH', 'DDPayne_NH', 'DDPayne_OH', 'DDPayne_AlH', 'DDPayne_MnH','DDPayne_NaH']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(APOGEE_catalog_label, DDPayne_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


          Parameter  diff_mean    diff_std        MAE
0   DDPayne_Teff[K]  -3.267898  117.447371  68.815192
1      DDPayne_Logg   0.001207    0.235851   0.134399
2        DDPayne_RV   5.053632    8.986913   6.936358
3       DDPayne_FeH   0.027440    0.098975   0.065759
4       DDPayne_MgH   0.019262    0.127965   0.088673
5       DDPayne_SiH   0.031351    0.179158   0.125096
6       DDPayne_CaH   0.031156    0.115433   0.084142
7       DDPayne_TiH  -0.009619    0.169938   0.112188
8       DDPayne_CrH  -0.011003    0.190328   0.119602
9       DDPayne_NiH   0.013855    0.125808   0.081023
10       DDPayne_CH   0.156316    0.133138   0.173531
11       DDPayne_NH   0.015273    0.177664   0.114093
12       DDPayne_OH   0.060733    0.209924   0.155149
13      DDPayne_AlH   0.036802    0.219170   0.153165
14      DDPayne_MnH   0.024074    0.207893   0.125655
15      DDPayne_NaH  -0.045814    0.356743   0.248386


In [21]:
results_df.to_csv('./result_MAE/ASPCAP_DDPayne_results.csv', index=False)

# 5. StarGRUNet和SpecTE联查

In [5]:
# 读取数据
path = r'../match/match_SpecTE-StarGRU-APOGEE.csv'
match_SpecTE_StarGRU_APOGEE_catalog=pd.read_csv(path)
match_SpecTE_StarGRU_APOGEE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142019 entries, 0 to 142018
Columns: 133 entries, obsid to Separation
dtypes: float64(127), int64(4), object(2)
memory usage: 144.1+ MB


In [None]:

columns = match_SpecTE_StarGRU_APOGEE_catalog.columns
for i in columns:
    print(i)

## StarGRU-ASPCAP

In [11]:
# StarGRUNet

# 统一名字方便代码复用
df = match_SpecTE_StarGRU_APOGEE_catalog

# 初始化存储结果的列表
results = []


# 参数列表
StarGRUNet_catalog_label = ['Teff[K]_2', 'Logg_2', 'FeH_2', 'MgH_2', 'SiH_2', 'SH_2', 'KH_2', 'CaH_2', 'TiH_2', 'CrH_2', 'NiH_2', 'CH_2', 'NH_2', 'OH_2', 'AlH_2', 'MnH_2']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]_2', 'ASPCAP_Logg_2', 'ASPCAP_FeH_2', 'ASPCAP_MgH_2', 
                        'ASPCAP_SiH_2', 'ASPCAP_SH_2', 'ASPCAP_KH_2', 'ASPCAP_CaH_2', 
                        'ASPCAP_TiH_2', 'ASPCAP_CrH_2', 'ASPCAP_NiH_2', 'ASPCAP_CH_2', 
                        'ASPCAP_NH_2', 'ASPCAP_OH_2', 'ASPCAP_AlH_2', 'ASPCAP_MnH_2']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, StarGRUNet_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


    Parameter  diff_mean    diff_std         MAE
0   Teff[K]_2  18.713020  166.398981  118.814556
1      Logg_2   0.017079    0.270694    0.165584
2       FeH_2  -0.000332    0.105143    0.057979
3       MgH_2   0.003712    0.090981    0.052603
4       SiH_2   0.003196    0.110924    0.060939
5        SH_2  -0.001969    0.128758    0.080865
6        KH_2   0.002181    0.153606    0.090440
7       CaH_2   0.005112    0.114553    0.060762
8       TiH_2  -0.000162    0.176522    0.104327
9       CrH_2  -0.007344    0.220973    0.128345
10      NiH_2  -0.001691    0.103921    0.061702
11       CH_2   0.000914    0.113846    0.066041
12       NH_2  -0.004668    0.192369    0.115286
13       OH_2   0.001598    0.119169    0.075563
14      AlH_2   0.001378    0.134659    0.075087
15      MnH_2  -0.000354    0.133301    0.074684


In [12]:
results_df.to_csv('./result_MAE/StarGRUNet_ASPCAP_results.csv', index=False)

## SpecTE-ASPCAP

In [19]:
# 统一名字方便代码复用
df = match_SpecTE_StarGRU_APOGEE_catalog

# 初始化存储结果的列表
results = []

# 参数列表
SpecTE_catalog_label = ['Teff[K]_1', 'Logg_1', 'RV', 'FeH_1', 'MgH_1', 'SiH_1', 'SH_1', 'KH_1', 'CaH_1', 'TiH_1', 'CrH_1', 'NiH_1', 'CH_1', 'NH_1', 'OH_1', 'AlH_1', 'MnH_1','NaH','VH']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]_1', 'ASPCAP_Logg_1', 'ASPCAP_RV_1', 'ASPCAP_FeH_1', 'ASPCAP_MgH_1', 
                        'ASPCAP_SiH_1', 'ASPCAP_SH_1', 'ASPCAP_KH_1', 'ASPCAP_CaH_1',
                        'ASPCAP_TiH_1', 'ASPCAP_CrH_1', 'ASPCAP_NiH_1', 'ASPCAP_CH_1',
                        'ASPCAP_NH_1', 'ASPCAP_OH_1', 'ASPCAP_AlH_1', 'ASPCAP_MnH_1','ASPCAP_NaH_1','ASPCAP_VH_1']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, SpecTE_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


    Parameter  diff_mean   diff_std        MAE
0   Teff[K]_1   0.421453  79.037952  40.967451
1      Logg_1  -0.000522   0.140154   0.075560
2          RV  -0.034773   8.983694   3.688715
3       FeH_1   0.000370   0.052872   0.032086
4       MgH_1   0.000291   0.054051   0.034600
5       SiH_1   0.000361   0.055472   0.035309
6        SH_1   0.000297   0.100200   0.066362
7        KH_1  -0.000249   0.123940   0.072213
8       CaH_1   0.000727   0.064898   0.039684
9       TiH_1   0.001125   0.142634   0.086466
10      CrH_1   0.000021   0.193043   0.110413
11      NiH_1   0.000225   0.061407   0.038378
12       CH_1   0.000400   0.069021   0.043070
13       NH_1   0.000181   0.163050   0.091977
14       OH_1   0.000434   0.088587   0.057320
15      AlH_1   0.000294   0.077035   0.048648
16      MnH_1   0.000710   0.079259   0.048009
17        NaH   0.001115   0.329509   0.203229
18         VH   0.000024   0.229702   0.148207


In [20]:
results_df.to_csv('./result_MAE/SpecTE_ASPCAP_results.csv', index=False)

# 6. StarGRU-APOGEE

In [None]:
# 读取数据
path = r'../match/match_StarGRU-APOGEE.csv'
match_StarGRU_APOGEE_catalog=pd.read_csv(path)
match_StarGRU_APOGEE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211250 entries, 0 to 211249
Data columns (total 64 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   col1                 211250 non-null  int64  
 1   Unnamed: 0           211250 non-null  int64  
 2   combined_obsid       211250 non-null  int64  
 3   combined_snrg        211250 non-null  float64
 4   combined_ra          211250 non-null  float64
 5   combined_dec         211250 non-null  float64
 6   Teff[K]              211250 non-null  float64
 7   Logg                 211250 non-null  float64
 8   CH                   211250 non-null  float64
 9   NH                   211250 non-null  float64
 10  OH                   211250 non-null  float64
 11  MgH                  211250 non-null  float64
 12  AlH                  211250 non-null  float64
 13  SiH                  211250 non-null  float64
 14  SH                   211250 non-null  float64
 15  KH               

In [4]:
# 统一名字方便代码复用
df = match_StarGRU_APOGEE_catalog

# 初始化存储结果的列表
results = []

# 参数列表
StarGRU_catalog_label = ['Teff[K]', 'Logg', 'FeH', 'MgH', 'SiH', 'SH', 'KH', 'CaH', 'TiH', 'CrH', 'NiH', 'CH', 'NH', 'OH', 'AlH', 'MnH',]
ASPCAP_catalog_label = ['ASPCAP_Teff[K]', 'ASPCAP_Logg', 'ASPCAP_FeH', 'ASPCAP_MgH', 
                        'ASPCAP_SiH', 'ASPCAP_SH', 'ASPCAP_KH', 'ASPCAP_CaH',
                        'ASPCAP_TiH', 'ASPCAP_CrH', 'ASPCAP_NiH', 'ASPCAP_CH',
                        'ASPCAP_NH', 'ASPCAP_OH', 'ASPCAP_AlH', 'ASPCAP_MnH',]

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, StarGRU_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)

results_df.to_csv('./result_MAE/StarGRU_ASPCAP_results.csv', index=False)

   Parameter  diff_mean    diff_std         MAE
0    Teff[K]  20.489817  167.715412  120.275275
1       Logg   0.020573    0.270868    0.162999
2        FeH   0.000347    0.105550    0.058063
3        MgH   0.004470    0.091424    0.052757
4        SiH   0.004040    0.111589    0.061065
5         SH  -0.001110    0.129356    0.081501
6         KH   0.002463    0.153433    0.090311
7        CaH   0.005701    0.114991    0.060900
8        TiH  -0.000023    0.177843    0.105889
9        CrH  -0.006672    0.223746    0.130721
10       NiH  -0.000996    0.103600    0.061814
11        CH   0.001656    0.114163    0.066205
12        NH  -0.004773    0.192501    0.116323
13        OH   0.002425    0.119407    0.076049
14       AlH   0.002704    0.135274    0.075421
15       MnH  -0.000064    0.133388    0.074887


# 7. SpecTE-APOGEE

In [6]:
# 读取数据
path = r'../match/match_SpecTE-APOGEE.csv'
match_SpecTE_APOGEE_catalog=pd.read_csv(path)
match_SpecTE_APOGEE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 242144 entries, 0 to 242143
Data columns (total 68 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   obsid                242144 non-null  int64  
 1   ra                   242144 non-null  float64
 2   dec                  242144 non-null  float64
 3   snrg                 242144 non-null  float64
 4   Teff[K]              242144 non-null  float64
 5   Teff[K]_uncertainty  242144 non-null  float64
 6   Logg                 242144 non-null  float64
 7   Logg_uncertainty     242144 non-null  float64
 8   RV                   242144 non-null  float64
 9   RV_uncertainty       242144 non-null  float64
 10  FeH                  242144 non-null  float64
 11  FeH_uncertainty      242144 non-null  float64
 12  MgH                  242144 non-null  float64
 13  MgH_uncertainty      242144 non-null  float64
 14  SiH                  242144 non-null  float64
 15  SiH_uncertainty  

In [8]:
# 统一名字方便代码复用
df = match_SpecTE_APOGEE_catalog

# 初始化存储结果的列表
results = []

SpecTE_catalog_label = ['Teff[K]', 'Logg', 'RV', 'FeH', 'MgH', 'SiH', 'SH', 'KH', 'CaH', 'TiH', 'CrH', 'NiH', 'CH', 'NH', 'OH', 'AlH', 'MnH','NaH','VH']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]', 'ASPCAP_Logg', 'ASPCAP_RV', 'ASPCAP_FeH', 'ASPCAP_MgH', 
                        'ASPCAP_SiH', 'ASPCAP_SH', 'ASPCAP_KH', 'ASPCAP_CaH',
                        'ASPCAP_TiH', 'ASPCAP_CrH', 'ASPCAP_NiH', 'ASPCAP_CH',
                        'ASPCAP_NH', 'ASPCAP_OH', 'ASPCAP_AlH', 'ASPCAP_MnH','ASPCAP_NaH','ASPCAP_VH']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, SpecTE_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)
results_df.to_csv('./result_MAE/SpecTE_ASPCAP_results.csv', index=False)

   Parameter  diff_mean   diff_std        MAE
0    Teff[K]   0.505303  81.048271  41.484998
1       Logg   0.000388   0.142814   0.074933
2         RV   0.010002   9.215413   3.714131
3        FeH   0.000549   0.053610   0.031834
4        MgH   0.000505   0.054662   0.034476
5        SiH   0.000562   0.056004   0.035196
6         SH   0.000855   0.102803   0.067684
7         KH  -0.000023   0.124448   0.072088
8        CaH   0.000601   0.065554   0.039751
9        TiH   0.000803   0.144432   0.087805
10       CrH   0.000749   0.197763   0.113800
11       NiH   0.000530   0.062225   0.038493
12        CH   0.000508   0.069851   0.043195
13        NH   0.000506   0.165583   0.093608
14        OH   0.000458   0.090713   0.058521
15       AlH   0.000769   0.077377   0.048551
16       MnH   0.000694   0.081052   0.048228
17       NaH   0.001655   0.334843   0.208176
18        VH  -0.000598   0.231875   0.150235


#  20-30snrg SpecTE-APOGEE

In [9]:
# 读取数据
path = r'../match/match_SpecTE-APOGEE.csv'
match_SpecTE_APOGEE_catalog=pd.read_csv(path)
match_SpecTE_APOGEE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 242144 entries, 0 to 242143
Data columns (total 68 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   obsid                242144 non-null  int64  
 1   ra                   242144 non-null  float64
 2   dec                  242144 non-null  float64
 3   snrg                 242144 non-null  float64
 4   Teff[K]              242144 non-null  float64
 5   Teff[K]_uncertainty  242144 non-null  float64
 6   Logg                 242144 non-null  float64
 7   Logg_uncertainty     242144 non-null  float64
 8   RV                   242144 non-null  float64
 9   RV_uncertainty       242144 non-null  float64
 10  FeH                  242144 non-null  float64
 11  FeH_uncertainty      242144 non-null  float64
 12  MgH                  242144 non-null  float64
 13  MgH_uncertainty      242144 non-null  float64
 14  SiH                  242144 non-null  float64
 15  SiH_uncertainty  

In [10]:
match_SpecTE_APOGEE_catalog
filtered_match_SpecTE_APOGEE_catalog = match_SpecTE_APOGEE_catalog[(match_SpecTE_APOGEE_catalog['snrg'] >= 20) & (match_SpecTE_APOGEE_catalog['snrg'] < 30)]
filtered_match_SpecTE_APOGEE_catalog.shape

(20062, 68)

In [12]:
# 统一名字方便代码复用
df = filtered_match_SpecTE_APOGEE_catalog

# 初始化存储结果的列表
results = []

SpecTE_catalog_label = ['Teff[K]', 'Logg', 'RV', 'FeH', 'MgH', 'SiH', 'SH', 'KH', 'CaH', 'TiH', 'CrH', 'NiH', 'CH', 'NH', 'OH', 'AlH', 'MnH','NaH','VH']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]', 'ASPCAP_Logg', 'ASPCAP_RV', 'ASPCAP_FeH', 'ASPCAP_MgH', 
                        'ASPCAP_SiH', 'ASPCAP_SH', 'ASPCAP_KH', 'ASPCAP_CaH',
                        'ASPCAP_TiH', 'ASPCAP_CrH', 'ASPCAP_NiH', 'ASPCAP_CH',
                        'ASPCAP_NH', 'ASPCAP_OH', 'ASPCAP_AlH', 'ASPCAP_MnH','ASPCAP_NaH','ASPCAP_VH']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, SpecTE_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


   Parameter  diff_mean   diff_std        MAE
0    Teff[K]   0.863836  80.268217  43.976534
1       Logg  -0.000498   0.138367   0.084786
2         RV  -0.196748  11.079629   4.303674
3        FeH   0.002051   0.056956   0.038054
4        MgH   0.000739   0.059504   0.040334
5        SiH   0.000811   0.061894   0.041782
6         SH   0.001427   0.116270   0.076774
7         KH   0.000471   0.123173   0.075846
8        CaH   0.001231   0.065289   0.042991
9        TiH   0.001536   0.137513   0.084320
10       CrH   0.001636   0.189375   0.109738
11       NiH   0.000836   0.065502   0.043781
12        CH   0.001332   0.077331   0.050789
13        NH   0.001666   0.146794   0.085986
14        OH   0.001929   0.094018   0.061608
15       AlH   0.001383   0.084044   0.055230
16       MnH   0.002474   0.082667   0.053947
17       NaH   0.003014   0.323226   0.203061
18        VH  -0.002566   0.245842   0.160337


In [13]:
results_df.to_csv('./result_MAE/SpecTE_ASPCAP_results_20-30.csv', index=False)