In [13]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from functions import *
from skfda.representation.basis import BSplineBasis
from skfda.representation.basis import FDataBasis

In [None]:
data_tpts, data_x, data_y = data_deal()
model_dic = {}
output_l = fos_B(data_x,data_y,data_tpts,n_basis=4,order=3,
          test_size=0,seed=[1],ite=0,
          model_type='l',n_neighbors=5)
model_dic['lin_model'] = output_l['model']

output_k = fos_B(data_x,data_y,data_tpts,n_basis=4,order=3,
          test_size=0,seed=[1],ite=0,
          model_type='k',n_neighbors=5)
model_dic['knn_model'] = output_k['model']

output_f = GBDT_B(data_x,data_y,data_tpts,n_basis=4,order=3,
          test_size=0,seed=[1],ite=0)
model_dic['gbdt_model'] = output_f['model']

output_x = XGB_B(data_x,data_y,data_tpts,n_basis=4,order=3,
          test_size=0,seed=[1],ite=0,params=None)
model_dic['xgb_model'] = output_x['model']

model_params = {'eta': 0.1,
        'max_depth':6,
        'objective': 'reg:squarederror'}
output_m = FBboost(data_x, data_y, model_params, 
            data_tpts, n_basis=4, order=3, 
            num_rounds=200, loss_type=1,
            test_size=0, seed=[1],
            ite=0, silent=True)
model_dic['mbr_model'] = output_m['model']
model_dic

In [None]:
file = 'STT100'
Yield = 'Sethylene%'
def data_deal_test(Yield):
    mto_df = pd.read_excel('...', sheet_name=file)
    mto_df = mto_df.iloc[:,6:]
    col_xy = list(mto_df.columns)
    col_x = list(mto_df.columns[0:13])
    # 创建新的ID列
    unique_flag = mto_df[col_x].duplicated()
    ID = (unique_flag == False).cumsum()
    mto_df['ID'] = ID
    mto_df['Yield'] = mto_df[Yield]/100
    Tmin = 0; Tmax = 300
    Grids = np.arange(Tmin, Tmax+1, 1)
    use_ID = [1]
    use_mto_df = mto_df
    print(use_mto_df)
    df_Curves = format_Curves(use_mto_df, use_ID, Grids)
    df_Curves[df_Curves <= 0] = np.nan
    data_y_kns = df_Curves
    fig, axs = plt.subplots(1, 1, figsize=(10, 8))
    for i in range(len(data_y_kns)):
        curves = data_y_kns.iloc[i, :].tolist()
        ax = axs
        ax.plot(range(len(curves)), curves)
        ax.set_title(f'Curve {1}')
        ax.set_xlim(0, 300)
        ax.set_xlabel('x')
        ax.set_ylabel('y')
    plt.tight_layout()
    plt.show()

    data_y = data_y_kns

    X_name = ['Modification','AS','A/T','FDSi','Largest Ring sizes','MDa',
              'MDb','MDc','Mdi','CD','crystal size(μm)','reaction temp(°C)',
              'WHSV(h-1)']
    cla_name = ['Modification', 'AS', 'Largest Ring sizes', 'CD']
    num_name = [n for n in X_name if n not in cla_name]

    X_df = use_mto_df.drop_duplicates(subset='ID', keep='first')
    data_x = X_df[X_name].reset_index(drop=True)
    data_x.head()
    return data_tpts,data_x,data_y

data_tpts, test_x, test_y = data_deal_test(Yield)
pred_dic = {}
R2_dic = {}
for key, model in model_dic.items():
    if key=='lin_model':
        y_pred_b = model.predict(test_x)
    elif key=='knn_model':
        y_pred_b = model.predict(test_x.to_numpy())
    elif key=='gbdt_model':
        y_pred_c= model.predict(test_x)
        basis = BSplineBasis(
            domain_range=(0.0, data_tpts[-1]),
            n_basis=4,
            order=3)
        y_pred_b = FDataBasis(basis, y_pred_c)
    elif key=='xgb_model':
        y_pred_c= model.predict(test_x)
        basis = BSplineBasis(
            domain_range=(0.0, data_tpts[-1]),
            n_basis=4,
            order=3)
        y_pred_b = FDataBasis(basis, y_pred_c)
    elif key=='mbr_model':
        dtest = xgb.DMatrix(test_x)
        c_pred = np.array([m.predict(dtest) for i, m in enumerate(model)]).T
        basis = BSplineBasis(
            domain_range=(0.0, data_tpts[-1]),
            n_basis=4,
            order=3)
        y_pred_b = FDataBasis(basis, c_pred)
    y_pred = y_pred_b.to_grid(data_tpts).data_matrix
    y_pred = y_pred.reshape(-1,len((y_pred[0])))
    y_pred_df = pd.DataFrame(y_pred,columns=test_y.columns)
    y_test_obs = test_y
    numerator = np.nansum((y_test_obs - y_pred_df)**2)
    denominator = np.nansum((y_test_obs - np.nanmean(y_test_obs))**2)
    try:
        R2_score = 1 - (numerator / denominator)
    except:
        R2_score = 0
    R2_dic[key] = R2_score
    pred_dic[key] = y_pred_df

In [None]:
c_list=['#EAB67A','#6E8FB2','#B38EBB','#3CA222','#EF4968']
ls_list = ['-','-',':','-.','-']
plt.figure(figsize=(10, 8),dpi=300)
i=0
pred_min=[]
pred_max=[]
key_map = {'lin_model': 'LIN model', 'gbdt_model': 'GBDT model', 'knn_model': 'KNN model', 'xgb_model': 'XGB model', 'mbr_model':'MBR-XGB model'}
pred_dic = {key_map.get(k, k): v for k, v in pred_dic.items()}


plt.plot(data_tpts, test_y.iloc[0,:],c='black',
         label='Actual',
         linewidth=3)
for key,pred in pred_dic.items():
    if key=='MBR-XGB model':
        plt.plot(data_tpts, pred.iloc[0,:],c=c_list[i], ls=ls_list[i],
         label=key,
         linewidth=3)
    else:
        plt.plot(data_tpts, pred.iloc[0,:],c=c_list[i], ls=ls_list[i],
                 label=key,
                 linewidth=2)
    i+=1
    pred_min.append(min(pred.iloc[0,:]))
    pred_max.append(max(pred.iloc[0,:]))

plt.xticks([5, 100, 200, 300],fontsize=25)
plt.yticks(fontsize=25)
y_min = min(pred_min)*0.5
y_max = max(pred_max)*1.35
plt.ylim(y_min, y_max)

plt.xlabel('TOS (min)', fontsize=28)
plt.ylabel(f'Propylene selectivity', fontsize=28)

plt.savefig(f"STT_test/{file}_{Yield}.png", bbox_inches='tight')
plt.show()