In [None]:
import tensorflow as tf
import keras
from keras import optimizers
from keras import regularizers
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.callbacks import TensorBoard
from keras.constraints import max_norm
from keras.models import Sequential 
from keras.layers import Dense 
from keras.layers import Dropout 
from keras.models import Model
from keras.layers import BatchNormalization
from keras.wrappers.scikit_learn import KerasClassifier 
from keras.wrappers.scikit_learn import KerasRegressor
from keras.constraints import maxnorm 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from xgboost import plot_importance
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn import ensemble
from sklearn.tree import ExtraTreeRegressor
from sklearn import svm
from sklearn import neighbors
from sklearn import tree
from sklearn.impute import SimpleImputer
import keras_metrics as km
from keras.callbacks import EarlyStopping 
from sklearn.metrics import roc_curve, auc

early_stopping=keras.callbacks.EarlyStopping(
 monitor="val_loss", 
 patience=20, 
 verbose=0, 
 mode="auto"
)
%matplotlib
###########loading data##########
fdata=pd.read_csv('database_filled.csv',encoding="gbk")
raw_data=fdata.loc[:,[                      
                      'Ionization Potential',#0
                      'Electronegativity',#1
                      'Number of d electrons',#2
                      'ZIF or MOF Derived',#3
                      'Carbon Nanofiber/Nanotubes',#4
                      'Carbon Black Derived',#5
                      'Biomass or other Organic Derived',#6 
                      'Main Transition Metal Content (wt. %)',#7
                      'Nitrogen Cotent (wt. %)',#8
                      'Metal-N Coordination Number (XAS)',#9    
                      'Pyridinic N Ratio',#10
                      'Pyrrolic N Ratio',#11
                      'Raman ID/IG Ratio',#12
                      'BET Surface Area (m2/g)',#13
                      'Pyrolysis Temperature (°C)',#14
                      'Pyrolysis Time (h)',#15
                      'Rising Rate (°C min-1)',#16
                      'Electrolyte Concentration (M)',#17
                      'Catalyst Loading (mg cm-2)',#18
                      'Electrolyte pH',#19
                      'FE95',#20
                        ]]

###########data standardization##########
standardized_data = (raw_data-np.mean(raw_data,axis=0))/np.std(raw_data,axis=0)

###########defining a wrapper function for later call from each machine learning algorithms##########
raw_input=standardized_data.iloc[:,0:20]
raw_output=raw_data.iloc[:,20]
###########fix random seed for reproducability##########
seed=1222
###########train test splitting##########
X_train, X_test, y_train, y_test = train_test_split(raw_input, raw_output, test_size=.1,random_state=seed)
raw_input_global=raw_data.iloc[:,0:20]
raw_output_global=raw_data.iloc[:,20]
###########wrap up fuction for later call for OPTIMIZATION##########
def evaluate(pre_2,real_2):
    pre_2=np.array(pre_2)
    real_2=np.array(real_2)
    pre_2_series=pd.Series(pre_2)
    real_2_series=pd.Series(real_2)
    return rmse(pre_2,real_2), round(pre_2_series.corr(real_2_series), 3)
def compare(list_name,limit):
    judge=1
    for a in list_name:
        if a < limit:
            judge=judge*1
        else:
            judge=judge*0
    return judge
def generate_arrays_from_file(path):
    while True:
        with open(path) as f:
            for line in f:
                # create numpy arrays of input data
                # and labels, from each line in the file
                x1, x2, y = process_line(line)
                yield ({'input_1': x1, 'input_2': x2}, {'output': y})
def intergate(y_pred):
    length=y_pred.shape[0]
    print(length)
    for i in range (0,length):
        if y_pred[i][0]>=0.5:
            y_pred[i][0]=1
        else:
            y_pred[i][0]=0
    return y_pred
accuracy={}
def auc_ANN(y_test,y_score,neurons1,epochs_number,dropout_rate,batch_size_number,reg,act):  
    y_test = y_test + 1
    y_score = y_score + 1
    fpr, tpr, thersholds = roc_curve(y_test, y_score, pos_label=2)
    roc_auc = auc(fpr, tpr)
    x_line=np.arange(0,1.01,0.01)
    y_line=np.arange(0,1.01,0.01)
    print('auc',roc_auc)
    fig=plt.figure()
    plt.plot(fpr, tpr, 'k--', label='ROC (AUC/area = {0:.2f})'.format(roc_auc), lw=2)
    plt.plot(x_line,y_line,c='red')
    plt.xlim([-0.05, 1.05])  # 设置x、y轴的上下限，以免和边缘重合，更好的观察图像的整体
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')  # 可以使用中文，但需要导入一些库即字体
    plt.title('ROC Curve of Artificial Neural Network')
    plt.legend(loc="lower right")
    plt.savefig('ROC Curve of %s %s %s %s %s %s FE95 ANN.png' %(neurons1,epochs_number,dropout_rate,batch_size_number,reg,act))

for neurons1 in [100,200,400,600,800]:
    for dropout_rate in [0,0.25,0.5]:
        for batch_size_number in [8,16,32]:
            for reg in [0,0.0001,0.001]:
                for act in ['sigmoid','tanh','relu','softsign']:                        
                    for epochs_number in range(100,800,100):
                        regularizer=keras.regularizers.l2(reg)
                        ###########keras ANN model construction########## 
                        model = Sequential() 
                        model.add(Dense(neurons1, input_dim=20, kernel_initializer='random_normal',
                                        bias_initializer='random_normal',activation=act,kernel_regularizer=regularizer)) 
                        model.add(Dropout(dropout_rate))                        
                        model.add(Dense(neurons1, input_dim=neurons1, kernel_initializer='random_normal',
                                        bias_initializer='random_normal',activation=act,kernel_regularizer=regularizer)) 
                        model.add(Dropout(dropout_rate))
                        model.add(Dense(1, input_dim=neurons1, activation='sigmoid'))
                        model.compile(loss='binary_crossentropy', optimizer='rmsprop',metrics=[km.f1_score(), km.binary_precision(), km.binary_recall()])
                        print('training...')
                        model.fit(X_train, y_train,verbose=0, batch_size=batch_size_number,epochs=epochs_number,validation_split=0.2,callbacks=[early_stopping])
                        print(neurons1,epochs_number,dropout_rate,batch_size_number,reg,act)
                        y_pred_test=model.predict(X_test)
                        y_pred_train=model.predict(X_train)
                        y_pred_test=intergate(y_pred_test)
                        y_pred_train=intergate(y_pred_train)
                        y_score=model.predict_proba(X_test)
                        print(classification_report(y_train,y_pred_train))
                        print(classification_report(y_test,y_pred_test))
                        final_result=classification_report(y_test,y_pred_test,output_dict=True)
                        ac=final_result['accuracy']
                        accuracy[ac]=[neurons1,epochs_number,dropout_rate,batch_size_number,reg,act]
                        auc_ANN(y_test,y_score,neurons1,epochs_number,dropout_rate,batch_size_number,reg,act)
                        ####################################################################
                        K.clear_session()  


In [None]:
print(accuracy)

In [None]:
import eli5
def base_model():
    tmodel = Sequential() 
    tmodel.add(Dense(600, input_dim=20, kernel_initializer='random_normal',
                    bias_initializer='random_normal',activation='tanh',kernel_regularizer=keras.regularizers.l2(0))) 
    tmodel.add(Dropout(0))
    tmodel.add(Dense(600, input_dim=600, kernel_initializer='random_normal',
                bias_initializer='random_normal',activation='tanh',kernel_regularizer=keras.regularizers.l2(0))) 
    tmodel.add(Dropout(0))
    tmodel.add(Dense(1, input_dim=600, activation='sigmoid'))
    tmodel.compile(loss='binary_crossentropy', optimizer='rmsprop',metrics=['accuracy'])
    return tmodel
from eli5.sklearn import PermutationImportance
my_model = KerasClassifier(build_fn=base_model,nb_epoch=300, batch_size=16, verbose= False)    
my_model.fit(X_train, y_train,validation_split=0.2,callbacks=[early_stopping])
perm = PermutationImportance(my_model, random_state=1,n_iter=10).fit(X_train,y_train)

In [None]:
eli5.show_weights(perm,feature_names=X_train.columns.tolist(),top=100)

In [None]:
eli5.show_weights(perm,top=100)

In [None]:
###########keras ANN model construction##########
smodel = Sequential() 
smodel.add(Dense(600, input_dim=20, kernel_initializer='random_normal',
                bias_initializer='random_normal',activation='tanh',kernel_regularizer=keras.regularizers.l2(0))) 
smodel.add(Dropout(0))
smodel.add(Dense(600, input_dim=600, kernel_initializer='random_normal',
            bias_initializer='random_normal',activation='tanh',kernel_regularizer=keras.regularizers.l2(0))) 
smodel.add(Dropout(0))
smodel.add(Dense(1, input_dim=600, activation='sigmoid'))
smodel.compile(loss='binary_crossentropy', optimizer='rmsprop',metrics=['accuracy'])
print('training...')
smodel.fit(X_train, y_train,verbose=0, batch_size=16,epochs=300,validation_split=0.2,callbacks=[early_stopping])

In [None]:
import shap
# %matplotlib
SHAP_INPUT=standardized_data.iloc[:,0:20]
SHAP_OUTPUT=raw_data.iloc[:,20]
X_SHAP=SHAP_INPUT.values.astype(np.float32)
y_SHAP=SHAP_OUTPUT.values.astype(np.float32)
explainer = shap.GradientExplainer(smodel,X_SHAP)
shap_values = explainer.shap_values(X_SHAP)
print(type(shap_values))

In [None]:
shap.summary_plot(shap_values[0], SHAP_INPUT,max_display=100,plot_type='dot')

In [None]:
global_importances = np.abs(shap_values).mean(1)
global_importances[0]