In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder, MinMaxScaler, RobustScaler, MaxAbsScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA
from sklearn.utils.class_weight import compute_class_weight

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

from xgboost import XGBClassifier, XGBRegressor
import xgboost as xgb

# Loading data

In [21]:
features_num = [
    'Total_flux', 'Peak_flux', 
       'NUV_flux_corr', 'u_flux_corr', 'Bw_flux_corr', 'R_flux_corr',
       'I_flux_corr', 'z_flux_corr', 'y_flux_corr',
       'J_flux_corr', 'H_flux_corr', 'K_flux_corr', 'Ks_flux_corr',
       'ch1_flux_corr', 'ch2_flux_corr', 'ch3_flux_corr', 'ch4_flux_corr',
       'F_MIPS_24', 'F_PACS_100', 'F_PACS_160', 'F_SPIRE_250', 'F_SPIRE_350',
       'F_SPIRE_500', 'Z_BEST', 'r_rcs_flux_corr',
       'g_flux_corr', 'nb921_hsc_flux_corr'
    ]
y_column = "Classification"

classes = ['jet-mode radio AGN/low-excitation radio galaxy', 'quasar-like radio AGN / high-excitation radio galaxy', 
           'radio-quiet AGN', 'star-forming galaxy']

In [22]:
data = pd.read_csv("../../Data/Fangyou_data/Cleaned/combined_using_similar_columns.csv")

In [23]:
X = data[features_num]
y = data[np.append(y_column, ['Xray', 'Opt_spec', 'Extended_radio', 'IRAGN'])]

In [24]:
le = LabelEncoder()
labels = np.unique(y[y_column].astype(str))
y[y_column] = le.fit_transform(y[y_column])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[y_column] = le.fit_transform(y[y_column])


In [25]:
# Creating training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, stratify=y['Classification'], random_state=42)

y_train_flags = y_train[['Xray', 'Opt_spec', 'Extended_radio', 'IRAGN']]
y_test_flags = y_test[['Xray', 'Opt_spec', 'Extended_radio', 'IRAGN']]

y_train = y_train.drop(columns=['Xray', 'Opt_spec', 'Extended_radio', 'IRAGN'])
y_test = y_test.drop(columns=['Xray', 'Opt_spec', 'Extended_radio', 'IRAGN'])

# SHAP

In [27]:
model = XGBClassifier(#use_label_encoder=False, 
                      max_depth=10, 
                      #reg_alpha=5,   
                      #min_child_weight=0.5, 
                      reg_lambda=10,
                      #subsample=0.5,
                      eta=0.05, # Remember to reduce to 0.1 or 0.05 for better results
                      tree_method='hist', # exact is more precies, but this is much faster
                      objective='multi:softprob',
                      eval_metric =['merror', 'mlogloss'],
                      nthread=64,
                      n_estimators=700,
                      )

In [28]:
bst = model.fit(X_train, y_train, 
                eval_set=[
                    (X_train, y_train), 
                    #(X_test, y_test )
                ], 
                #early_stopping_rounds=50,
                verbose=True, 
                #sample_weight=classes_weights,
               ) #

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0]	validation_0-merror:0.11425	validation_0-mlogloss:1.31451
[1]	validation_0-merror:0.11061	validation_0-mlogloss:1.24944
[2]	validation_0-merror:0.10882	validation_0-mlogloss:1.18992
[3]	validation_0-merror:0.10745	validation_0-mlogloss:1.13511
[4]	validation_0-merror:0.10521	validation_0-mlogloss:1.08425
[5]	validation_0-merror:0.10463	validation_0-mlogloss:1.03742
[6]	validation_0-merror:0.10277	validation_0-mlogloss:0.99386
[7]	validation_0-merror:0.10198	validation_0-mlogloss:0.95332
[8]	validation_0-merror:0.10059	validation_0-mlogloss:0.91529
[9]	validation_0-merror:0.09934	validation_0-mlogloss:0.87967
[10]	validation_0-merror:0.09813	validation_0-mlogloss:0.84627
[11]	validation_0-merror:0.09705	validation_0-mlogloss:0.81508
[12]	validation_0-merror:0.09578	validation_0-mlogloss:0.78553
[13]	validation_0-merror:0.09463	validation_0-mlogloss:0.75768
[14]	validation_0-merror:0.09360	validation_0-mlogloss:0.73143
[15]	validation_0-merror:0.09288	validation_0-mlogloss:0.70669
[1

[130]	validation_0-merror:0.03153	validation_0-mlogloss:0.13236
[131]	validation_0-merror:0.03130	validation_0-mlogloss:0.13168
[132]	validation_0-merror:0.03096	validation_0-mlogloss:0.13089
[133]	validation_0-merror:0.03074	validation_0-mlogloss:0.13020
[134]	validation_0-merror:0.03046	validation_0-mlogloss:0.12942
[135]	validation_0-merror:0.03036	validation_0-mlogloss:0.12882
[136]	validation_0-merror:0.03008	validation_0-mlogloss:0.12813
[137]	validation_0-merror:0.02986	validation_0-mlogloss:0.12742
[138]	validation_0-merror:0.02967	validation_0-mlogloss:0.12668
[139]	validation_0-merror:0.02926	validation_0-mlogloss:0.12603
[140]	validation_0-merror:0.02912	validation_0-mlogloss:0.12541
[141]	validation_0-merror:0.02881	validation_0-mlogloss:0.12458
[142]	validation_0-merror:0.02850	validation_0-mlogloss:0.12387
[143]	validation_0-merror:0.02824	validation_0-mlogloss:0.12331
[144]	validation_0-merror:0.02802	validation_0-mlogloss:0.12266
[145]	validation_0-merror:0.02768	valida

[259]	validation_0-merror:0.01180	validation_0-mlogloss:0.07727
[260]	validation_0-merror:0.01161	validation_0-mlogloss:0.07700
[261]	validation_0-merror:0.01154	validation_0-mlogloss:0.07667
[262]	validation_0-merror:0.01142	validation_0-mlogloss:0.07638
[263]	validation_0-merror:0.01139	validation_0-mlogloss:0.07617
[264]	validation_0-merror:0.01129	validation_0-mlogloss:0.07586
[265]	validation_0-merror:0.01124	validation_0-mlogloss:0.07564
[266]	validation_0-merror:0.01112	validation_0-mlogloss:0.07531
[267]	validation_0-merror:0.01106	validation_0-mlogloss:0.07508
[268]	validation_0-merror:0.01098	validation_0-mlogloss:0.07482
[269]	validation_0-merror:0.01094	validation_0-mlogloss:0.07457
[270]	validation_0-merror:0.01084	validation_0-mlogloss:0.07428
[271]	validation_0-merror:0.01082	validation_0-mlogloss:0.07401
[272]	validation_0-merror:0.01072	validation_0-mlogloss:0.07376
[273]	validation_0-merror:0.01064	validation_0-mlogloss:0.07351
[274]	validation_0-merror:0.01064	valida

[388]	validation_0-merror:0.00399	validation_0-mlogloss:0.05100
[389]	validation_0-merror:0.00397	validation_0-mlogloss:0.05087
[390]	validation_0-merror:0.00395	validation_0-mlogloss:0.05074
[391]	validation_0-merror:0.00393	validation_0-mlogloss:0.05061
[392]	validation_0-merror:0.00388	validation_0-mlogloss:0.05047
[393]	validation_0-merror:0.00387	validation_0-mlogloss:0.05037
[394]	validation_0-merror:0.00381	validation_0-mlogloss:0.05021
[395]	validation_0-merror:0.00381	validation_0-mlogloss:0.05007
[396]	validation_0-merror:0.00380	validation_0-mlogloss:0.04992
[397]	validation_0-merror:0.00378	validation_0-mlogloss:0.04971
[398]	validation_0-merror:0.00375	validation_0-mlogloss:0.04954
[399]	validation_0-merror:0.00369	validation_0-mlogloss:0.04938
[400]	validation_0-merror:0.00364	validation_0-mlogloss:0.04923
[401]	validation_0-merror:0.00364	validation_0-mlogloss:0.04909
[402]	validation_0-merror:0.00363	validation_0-mlogloss:0.04894
[403]	validation_0-merror:0.00356	valida

[517]	validation_0-merror:0.00143	validation_0-mlogloss:0.03423
[518]	validation_0-merror:0.00143	validation_0-mlogloss:0.03413
[519]	validation_0-merror:0.00136	validation_0-mlogloss:0.03404
[520]	validation_0-merror:0.00136	validation_0-mlogloss:0.03397
[521]	validation_0-merror:0.00136	validation_0-mlogloss:0.03389
[522]	validation_0-merror:0.00134	validation_0-mlogloss:0.03378
[523]	validation_0-merror:0.00131	validation_0-mlogloss:0.03367
[524]	validation_0-merror:0.00125	validation_0-mlogloss:0.03357
[525]	validation_0-merror:0.00124	validation_0-mlogloss:0.03347
[526]	validation_0-merror:0.00124	validation_0-mlogloss:0.03340
[527]	validation_0-merror:0.00120	validation_0-mlogloss:0.03331
[528]	validation_0-merror:0.00119	validation_0-mlogloss:0.03321
[529]	validation_0-merror:0.00117	validation_0-mlogloss:0.03313
[530]	validation_0-merror:0.00119	validation_0-mlogloss:0.03304
[531]	validation_0-merror:0.00115	validation_0-mlogloss:0.03296
[532]	validation_0-merror:0.00112	valida

[646]	validation_0-merror:0.00031	validation_0-mlogloss:0.02410
[647]	validation_0-merror:0.00029	validation_0-mlogloss:0.02402
[648]	validation_0-merror:0.00028	validation_0-mlogloss:0.02395
[649]	validation_0-merror:0.00028	validation_0-mlogloss:0.02388
[650]	validation_0-merror:0.00028	validation_0-mlogloss:0.02383
[651]	validation_0-merror:0.00024	validation_0-mlogloss:0.02377
[652]	validation_0-merror:0.00024	validation_0-mlogloss:0.02371
[653]	validation_0-merror:0.00024	validation_0-mlogloss:0.02364
[654]	validation_0-merror:0.00024	validation_0-mlogloss:0.02358
[655]	validation_0-merror:0.00024	validation_0-mlogloss:0.02351
[656]	validation_0-merror:0.00024	validation_0-mlogloss:0.02345
[657]	validation_0-merror:0.00024	validation_0-mlogloss:0.02339
[658]	validation_0-merror:0.00024	validation_0-mlogloss:0.02334
[659]	validation_0-merror:0.00024	validation_0-mlogloss:0.02328
[660]	validation_0-merror:0.00024	validation_0-mlogloss:0.02322
[661]	validation_0-merror:0.00024	valida

In [26]:
model = XGBClassifier()
model.load_model("../../Supervised/Fangyou/Preprocessed_data/combined_xgboost.json")

XGBoostError: [21:50:36] ../include/xgboost/json.h:73: Invalid cast, from Integer to Boolean
Stack trace:
  [bt] (0) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x8d264) [0x2b72d3ebb264]
  [bt] (1) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0xbf379) [0x2b72d3eed379]
  [bt] (2) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x22fed8) [0x2b72d405ded8]
  [bt] (3) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x1960e5) [0x2b72d3fc40e5]
  [bt] (4) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x175c46) [0x2b72d3fa3c46]
  [bt] (5) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(+0x1b407d) [0x2b72d3fe207d]
  [bt] (6) /Users/users/karsten/.local/lib/python3.9/site-packages/xgboost/lib/libxgboost.so(XGBoosterLoadModel+0x5bf) [0x2b72d3ead85f]
  [bt] (7) /Software/users/modules/7/software/anaconda3/2021.11/lib/python3.9/lib-dynload/../../libffi.so.7(+0x69dd) [0x2b7290fd19dd]
  [bt] (8) /Software/users/modules/7/software/anaconda3/2021.11/lib/python3.9/lib-dynload/../../libffi.so.7(+0x6067) [0x2b7290fd1067]



In [None]:
import shap

shap.initjs()
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train)

In [67]:
shap.plots.waterfall(exp[0])

IndexError: invalid index to scalar variable.

<Figure size 576x252 with 0 Axes>

In [74]:
shap.plots.waterfall(shap_values[0], shap_values.values[0], shap_values.data)

Exception: waterfall_plot requires a scalar base_values of the model output as the first parameter, but you have passed an array as the first parameter! Try shap.waterfall_plot(explainer.base_values[0], values[0], X[0]) or for multi-output models try shap.waterfall_plot(explainer.base_values[0], values[0][0], X[0]).

In [46]:
shap.plots.force(shap_values[0])

Exception: In v0.20 force_plot now requires the base value as the first parameter! Try shap.force_plot(explainer.expected_value, shap_values) or for multi-output models try shap.force_plot(explainer.expected_value[0], shap_values[0]).

In [11]:
from platform import python_version

print(python_version())

3.9.7
