In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, plot_confusion_matrix

import shap

In [None]:
DB = pd.read_csv('144612_PDB_Database.csv')

In [None]:
y_static = DB['Static']
y_dynamic = DB['Dynamic']
y_static_class = DB['Static Class']
y_dynamic_class = DB['Dynamic Class']
X = DB.drop(['Static','Dynamic','Static Class','Dynamic Class'],axis=1)

In [None]:
def regression_plot(y_test,y_predict):
    plt.figure(figsize=(5,5),dpi=300)
    plt.rc('axes', labelsize=18) 
    plt.scatter(y_test,y_predict,alpha = 0.2,color='blue')
    plt.xlabel('Actual value'); plt.ylabel('Predicted value')
    plt.plot([-5,5],[-5,5],color="red")
    plt.xlim(-5,5); plt.ylim(-5,5)
    plt.xticks(np.arange(-5,6,1)); plt.yticks(np.arange(-5,6,1))
    plt.show()   

In [None]:
xgb = XGBRegressor(random_state=0,n_estimators=200,learning_rate=0.2,max_depth=6) # static_regression
#xgb = XGBRegressor(random_state=0,n_estimators=200,learning_rate=0.2,max_depth=6) # dynamic_regression

X_train, X_test, y_train, y_test = train_test_split(X,y_static, train_size =0.8, random_state = 0)
#X_train, X_test, y_train, y_test = train_test_split(X,y_dynamic, train_size =0.8, random_state = 0)

models = [xgb]
models_n = ['XGB']    


for clf_n,clf in zip(models_n,models):
    
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    # fi = clf.feature_importances_
        
    mae = mean_absolute_error(y_test,y_pred)
    rmse = (mean_squared_error(y_test,y_pred))**0.5
    r2 = r2_score(y_test,y_pred)

    print('Model: {} - MAE :{:.4f} | RMSE :{:.4f} | R2 :{:.4f}'.format(clf_n,mae,rmse,r2))
    regression_plot(y_test,y_pred)

In [None]:
xgb = XGBClassifier(random_state=0,n_estimators=200,learning_rate=0.1,max_depth=6) # static_classification
#xgb = XGBClassifier(random_state=0,n_estimators=200,learning_rate=0.1,max_depth=6) # dynamic_classification

X_train, X_test, y_train, y_test = train_test_split(X,y_static_class, train_size =0.8, random_state = 0)
#X_train, X_test, y_train, y_test = train_test_split(X,y_dynamic_class, train_size =0.8, random_state = 0)

models = [xgb]
models_n = ['XGB']    

PRED = pd.DataFrame()
ACC = pd.DataFrame()
PRECISION = pd.DataFrame()
RECALL = pd.DataFrame()
F1 = pd.DataFrame()
FI = pd.DataFrame()

for clf_n,clf in zip(models_n,models):
    
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    # fi = clf.feature_importances_
    
    acc = accuracy_score(y_test,y_pred)
    precision = precision_score(y_test,y_pred)
    recall = recall_score(y_test,y_pred)
    f1 = f1_score(y_test,y_pred)
    
    plot_confusion_matrix(clf, X_test, y_test, cmap = 'Blues')
    plt.show()
    print('Model: {} - ACC :{:.4f} | PRECISION :{:.4f} | RECALL :{:.4f} | F1 :{:.4f}'.format(clf_n,acc,precision,recall,f1))

In [None]:
xgb = XGBRegressor(random_state=0,n_estimators=200,learning_rate=0.2,max_depth=6) # static_regression
#xgb = XGBRegressor(random_state=0,n_estimators=200,learning_rate=0.2,max_depth=6) # dynamic_regression
#xgb = XGBClassifier(random_state=0,n_estimators=200,learning_rate=0.1,max_depth=6) # static_classification
#xgb = XGBClassifier(random_state=0,n_estimators=200,learning_rate=0.1,max_depth=6) # dynamic_classification


XGB = xgb.fit(X,y_static)
XGB = xgb.fit(X,y_dynamic)
XGB = xgb.fit(X,y_static_class)
XGB = xgb.fit(X,y_dynamic_class)

explainer = shap.TreeExplainer(XGB)
shap_values = explainer.shap_values(X)
shap.summary_plot(shap_values, X)
plt.savefig('static_cla.png', dpi=300, bbox_inches='tight')