In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error

# 计算不同星表之间的MAE等指标
需要先对星表进行交叉匹配，这里读取交叉匹配后的数据

# GALAH星表

In [15]:
# GALAH星表
path = r'../match/match_SpecTE-GALAHdr4.csv'
match_GALAH_SpecTE_catalog=pd.read_csv(path)

In [16]:
match_GALAH_SpecTE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51763 entries, 0 to 51762
Data columns (total 63 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   GALAH_sobject_id     51763 non-null  float64
 1   GALAH_RA             51763 non-null  float64
 2   GALAH_Dec            51763 non-null  float64
 3   GALAH_Teff[K]        51763 non-null  float64
 4   GALAH_Logg           51763 non-null  float64
 5   GALAH_RV             51763 non-null  float64
 6   GALAH_FeH            51763 non-null  float64
 7   GALAH_MgH            51653 non-null  float64
 8   GALAH_SiH            51597 non-null  float64
 9   GALAH_KH             51706 non-null  float64
 10  GALAH_CaH            51740 non-null  float64
 11  GALAH_TiH            51739 non-null  float64
 12  GALAH_CrH            51738 non-null  float64
 13  GALAH_NiH            51750 non-null  float64
 14  GALAH_CH             51738 non-null  float64
 15  GALAH_OH             51526 non-null 

In [17]:
# 统一名字方便代码复用
df = match_GALAH_SpecTE_catalog

# 初始化存储结果的列表
results = []

# 参数列表 需要上下一一对应
GALAH_catalog_label = ['GALAH_Teff[K]','GALAH_Logg', 'GALAH_RV',
                'GALAH_FeH', 'GALAH_MgH', 'GALAH_SiH', 'GALAH_KH', 'GALAH_CaH',
                'GALAH_TiH', 'GALAH_CrH', 'GALAH_NiH', 'GALAH_CH', 'GALAH_OH',
                'GALAH_AlH', 'GALAH_MnH', 'GALAH_NaH', 'GALAH_VH']
SpecTE_catalog_label = ['Teff[K]', 'Logg', 'RV', 'FeH', 'MgH', 'SiH', 'KH', 'CaH', 'TiH', 'CrH', 'NiH', 'CH', 'OH', 'AlH', 'MnH', 'NaH', 'VH']



# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(GALAH_catalog_label, SpecTE_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


   Parameter  diff_mean    diff_std        MAE
0    Teff[K]  19.460700  155.528897  85.742087
1       Logg   0.019141    0.219635   0.118555
2         RV  -0.123528   10.964424   4.392765
3        FeH  -0.003704    0.107662   0.066694
4        MgH   0.014644    0.108672   0.074570
5        SiH  -0.000472    0.102236   0.073388
6         KH  -0.031369    0.231162   0.169824
7        CaH  -0.000564    0.115830   0.080776
8        TiH   0.107884    0.133933   0.136042
9        CrH   0.144493    0.163358   0.175293
10       NiH  -0.011352    0.119071   0.074378
11        CH   0.000927    0.182991   0.126759
12        OH   0.006881    0.259169   0.187213
13       AlH  -0.076222    0.190808   0.150047
14       MnH  -0.026443    0.136941   0.095558
15       NaH   0.132113    0.191129   0.185109
16        VH   0.164462    0.170693   0.191414


In [18]:
results_df.to_csv('./result_MAE/GALAH_SpecTE_results.csv', index=False)

# GALAH_ASPCAP星表

In [21]:
# GALAH星表
path = r'F:\My_trial\Fits_preprocessed\other_catalog\match_GALAH_apogee.csv'
match_GALAH_apogee_catalog=pd.read_csv(path)
match_GALAH_apogee_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20977 entries, 0 to 20976
Data columns (total 38 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   GALAH_starid    20977 non-null  object 
 1   GALAH_RA        20977 non-null  float64
 2   GALAH_Dec       20977 non-null  float64
 3   GALAH_Teff[K]   20977 non-null  float64
 4   GALAH_Logg      20977 non-null  float64
 5   GALAH_FeH       20977 non-null  float64
 6   GALAH_MgH       20613 non-null  float64
 7   GALAH_SiH       20844 non-null  float64
 8   GALAH_KH        19621 non-null  float64
 9   GALAH_CaH       20403 non-null  float64
 10  GALAH_TiH       20483 non-null  float64
 11  GALAH_CrH       20787 non-null  float64
 12  GALAH_NiH       19666 non-null  float64
 13  GALAH_CH        7945 non-null   float64
 14  GALAH_OH        19797 non-null  float64
 15  GALAH_AlH       20602 non-null  float64
 16  GALAH_MnH       20809 non-null  float64
 17  ASPCAP_starid   20977 non-null 

In [22]:
# 统一名字方便代码复用
df = match_GALAH_apogee_catalog

# 初始化存储结果的列表
results = []

# 参数列表
GALAH_catalog_label = ['GALAH_Teff[K]', 'GALAH_Logg', 'GALAH_FeH', 'GALAH_MgH', 'GALAH_SiH', 'GALAH_KH', 'GALAH_CaH', 'GALAH_TiH', 'GALAH_CrH', 'GALAH_NiH', 'GALAH_CH', 'GALAH_OH', 'GALAH_AlH', 'GALAH_MnH']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]', 'ASPCAP_Logg', 'ASPCAP_FeH', 'ASPCAP_MgH', 'ASPCAP_SiH', 'ASPCAP_KH', 'ASPCAP_CaH', 'ASPCAP_TiH', 'ASPCAP_CrH', 'ASPCAP_NiH', 'ASPCAP_CH', 'ASPCAP_OH', 'ASPCAP_AlH', 'ASPCAP_MnH']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(GALAH_catalog_label, ASPCAP_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


         Parameter  diff_mean    diff_std        MAE
0   ASPCAP_Teff[K]   0.086246  109.034836  72.228294
1      ASPCAP_Logg  -0.026931    0.133338   0.090275
2       ASPCAP_FeH  -0.020208    0.105276   0.072803
3       ASPCAP_MgH  -0.019688    0.137432   0.100732
4       ASPCAP_SiH  -0.031290    0.102146   0.080092
5        ASPCAP_KH  -0.031112    0.252438   0.174172
6       ASPCAP_CaH   0.040445    0.144833   0.108957
7       ASPCAP_TiH   0.085806    0.232145   0.182271
8       ASPCAP_CrH   0.051695    0.266992   0.179080
9       ASPCAP_NiH  -0.030732    0.135470   0.102430
10       ASPCAP_CH   0.087263    0.193296   0.142837
11       ASPCAP_OH   0.042217    0.232229   0.170631
12      ASPCAP_AlH  -0.001976    0.156198   0.117927
13      ASPCAP_MnH  -0.025265    0.166360   0.111607


In [11]:
results_df.to_csv('./分析结果/GALAH_ASPCAP_results.csv', index=False)

# StarGRUNet星表

In [5]:
# 读取数据
path = r'../match/match_SpecTE-StarGRU-APOGEE.csv'
match_SpecTE_StarGRU_APOGEE_catalog=pd.read_csv(path)
match_SpecTE_StarGRU_APOGEE_catalog.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142019 entries, 0 to 142018
Columns: 133 entries, obsid to Separation
dtypes: float64(127), int64(4), object(2)
memory usage: 144.1+ MB


In [None]:

columns = match_SpecTE_StarGRU_APOGEE_catalog.columns
for i in columns:
    print(i)

## StarGRU-ASPCAP

In [11]:
# StarGRUNet

# 统一名字方便代码复用
df = match_SpecTE_StarGRU_APOGEE_catalog

# 初始化存储结果的列表
results = []


# 参数列表
StarGRUNet_catalog_label = ['Teff[K]_2', 'Logg_2', 'FeH_2', 'MgH_2', 'SiH_2', 'SH_2', 'KH_2', 'CaH_2', 'TiH_2', 'CrH_2', 'NiH_2', 'CH_2', 'NH_2', 'OH_2', 'AlH_2', 'MnH_2']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]_2', 'ASPCAP_Logg_2', 'ASPCAP_FeH_2', 'ASPCAP_MgH_2', 
                        'ASPCAP_SiH_2', 'ASPCAP_SH_2', 'ASPCAP_KH_2', 'ASPCAP_CaH_2', 
                        'ASPCAP_TiH_2', 'ASPCAP_CrH_2', 'ASPCAP_NiH_2', 'ASPCAP_CH_2', 
                        'ASPCAP_NH_2', 'ASPCAP_OH_2', 'ASPCAP_AlH_2', 'ASPCAP_MnH_2']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, StarGRUNet_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


    Parameter  diff_mean    diff_std         MAE
0   Teff[K]_2  18.713020  166.398981  118.814556
1      Logg_2   0.017079    0.270694    0.165584
2       FeH_2  -0.000332    0.105143    0.057979
3       MgH_2   0.003712    0.090981    0.052603
4       SiH_2   0.003196    0.110924    0.060939
5        SH_2  -0.001969    0.128758    0.080865
6        KH_2   0.002181    0.153606    0.090440
7       CaH_2   0.005112    0.114553    0.060762
8       TiH_2  -0.000162    0.176522    0.104327
9       CrH_2  -0.007344    0.220973    0.128345
10      NiH_2  -0.001691    0.103921    0.061702
11       CH_2   0.000914    0.113846    0.066041
12       NH_2  -0.004668    0.192369    0.115286
13       OH_2   0.001598    0.119169    0.075563
14      AlH_2   0.001378    0.134659    0.075087
15      MnH_2  -0.000354    0.133301    0.074684


In [12]:
results_df.to_csv('./result_MAE/StarGRUNet_ASPCAP_results.csv', index=False)

## SpecTE-ASPCAP

In [19]:
# 统一名字方便代码复用
df = match_SpecTE_StarGRU_APOGEE_catalog

# 初始化存储结果的列表
results = []

# 参数列表
SpecTE_catalog_label = ['Teff[K]_1', 'Logg_1', 'RV', 'FeH_1', 'MgH_1', 'SiH_1', 'SH_1', 'KH_1', 'CaH_1', 'TiH_1', 'CrH_1', 'NiH_1', 'CH_1', 'NH_1', 'OH_1', 'AlH_1', 'MnH_1','NaH','VH']
ASPCAP_catalog_label = ['ASPCAP_Teff[K]_1', 'ASPCAP_Logg_1', 'ASPCAP_RV_1', 'ASPCAP_FeH_1', 'ASPCAP_MgH_1', 
                        'ASPCAP_SiH_1', 'ASPCAP_SH_1', 'ASPCAP_KH_1', 'ASPCAP_CaH_1',
                        'ASPCAP_TiH_1', 'ASPCAP_CrH_1', 'ASPCAP_NiH_1', 'ASPCAP_CH_1',
                        'ASPCAP_NH_1', 'ASPCAP_OH_1', 'ASPCAP_AlH_1', 'ASPCAP_MnH_1','ASPCAP_NaH_1','ASPCAP_VH_1']

# 计算 diff_std, diff_mean 和 mae
for galah_label, specte_label in zip(ASPCAP_catalog_label, SpecTE_catalog_label):
    
    # 处理空值，确保两个列都没有空值
    valid_data = df[[galah_label, specte_label]].dropna()
    
    # 计算差值的均值和标准差
    diff = valid_data[galah_label] - valid_data[specte_label]
    diff_std = diff.std(axis=0)
    diff_mean = diff.mean(axis=0)
    
    # 计算 mae
    mae = mean_absolute_error(valid_data[galah_label], valid_data[specte_label])
    
    # 添加结果到列表中
    results.append([specte_label, diff_mean, diff_std, mae])

# 创建 DataFrame
results_df = pd.DataFrame(results, columns=['Parameter', 'diff_mean', 'diff_std', 'MAE'])

# 打印结果 DataFrame
print(results_df)


    Parameter  diff_mean   diff_std        MAE
0   Teff[K]_1   0.421453  79.037952  40.967451
1      Logg_1  -0.000522   0.140154   0.075560
2          RV  -0.034773   8.983694   3.688715
3       FeH_1   0.000370   0.052872   0.032086
4       MgH_1   0.000291   0.054051   0.034600
5       SiH_1   0.000361   0.055472   0.035309
6        SH_1   0.000297   0.100200   0.066362
7        KH_1  -0.000249   0.123940   0.072213
8       CaH_1   0.000727   0.064898   0.039684
9       TiH_1   0.001125   0.142634   0.086466
10      CrH_1   0.000021   0.193043   0.110413
11      NiH_1   0.000225   0.061407   0.038378
12       CH_1   0.000400   0.069021   0.043070
13       NH_1   0.000181   0.163050   0.091977
14       OH_1   0.000434   0.088587   0.057320
15      AlH_1   0.000294   0.077035   0.048648
16      MnH_1   0.000710   0.079259   0.048009
17        NaH   0.001115   0.329509   0.203229
18         VH   0.000024   0.229702   0.148207


In [20]:
results_df.to_csv('./result_MAE/SpecTE_ASPCAP_results.csv', index=False)