In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
import tensorflow as tf
from models import CNN_Model

IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
Using TensorFlow backend.


In [2]:
# Load trained model
weights_path = 'trained_mvp_model_original_data.h5'

exclude_cols = {
    'var_id', 'aaref', 'aaalt', 'target', 'Ensembl_transcriptid',
    'ref', 'alt', 'category', 'source', 'INFO', 'disease', 'genename',
    '#chr', 'pos(1-based)',  'hg19_chr', 'hg19_pos(1-based)',
    'CADD_phred', '1000Gp3_AF', 'ExAC_AF', 'gnomad',
    'RVIS', 'mis_badness', 'MPC', 'REVEL', 'domino'
}

model = CNN_Model(
    weights_path=weights_path,
    exclude_cols=exclude_cols,
    train_flag=False,
    verbose=2,
    name='res_HIS_original_data',
    fname='../data/mvp_input_data_cleaned.HIS.csv',
    f_out='../data/mvp_output_data_mode5.csv'
)

model._init_model(verbose=False)
model.model.load_weights(weights_path)
print("Trained model loaded successfully!")






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where





Update your `Model` call to the Keras 2 API: `Model(inputs=Tensor("in..., outputs=Tensor("ac...)`
2025-04-24 00:54:09.454608: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2025-04-24 00:54:09.473002: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3110410000 Hz
2025-04-24 00:54:09.476735: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x71d5fc0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2025-04-24 00:54:09.476771: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2025-04-24 00:54:09.491929: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1





Trained model loaded successfully!


2025-04-24 00:54:10.045222: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:969] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-04-24 00:54:10.045400: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7285c20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-04-24 00:54:10.045415: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 2050, Compute Capability 8.6
2025-04-24 00:54:10.049340: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:969] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-04-24 00:54:10.049382: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: NVIDIA GeForce RTX 2050 major: 8 minor: 6 memoryClockRate(GHz): 1.245
pciBusID: 000

In [3]:
# Load data for SHAP analysis
df = pd.read_csv('../data/mvp_input_data_cleaned.HIS.csv')
feature_cols = [col for col in df.columns if col != 'target']
X = df[feature_cols].values

# Sample background and explanation data
np.random.seed(2025)
idx_bg = np.random.choice(X.shape[0], 100, replace=False)
idx_explain = np.random.choice(X.shape[0], 50, replace=False)

X_bg = X[idx_bg].reshape((-1, 54, 1, 1))
X_explain = X[idx_explain].reshape((-1, 54, 1, 1))

# Initialize SHAP DeepExplainer
tf.compat.v1.keras.backend.get_session().run(tf.compat.v1.global_variables_initializer())
explainer = shap.DeepExplainer(model.model, X_bg)
shap_values = explainer.shap_values(X_explain)

# SHAP values for plotting
X_explain_flat = X_explain.reshape((X_explain.shape[0], X_explain.shape[1]))
shap_values_flat = shap_values[0].reshape((shap_values[0].shape[0], shap_values[0].shape[1]))





2025-04-24 00:54:20.843729: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:969] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-04-24 00:54:20.843787: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: NVIDIA GeForce RTX 2050 major: 8 minor: 6 memoryClockRate(GHz): 1.245
pciBusID: 0000:01:00.0
2025-04-24 00:54:20.843924: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcudart.so.10.0'; dlerror: libcudart.so.10.0: cannot open shared object file: No such file or directory
2025-04-24 00:54:20.843981: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcublas.so.10.0'; dlerror: libcublas.so.10.0: cannot open shared object file: No such file or directory
2025-04-24 00:54:20.844015: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could 

In [4]:
# SHAP beeswarm plot
shap.summary_plot(
    shap_values_flat, 
    features=X_explain_flat, 
    feature_names=feature_cols,
    max_display=54,
    show=False
)
plt.savefig('../results/mvp_origninal_data_shap_beeswarm_summary.png', dpi=300, bbox_inches='tight')
print("SHAP beeswarm plot saved")


SHAP beeswarm plot saved


In [5]:
# SHAP bar chart of mean absolute values
mean_shap = np.abs(shap_values_flat).mean(axis=0)
sorted_idx = np.argsort(mean_shap)
sorted_shap_values = mean_shap[sorted_idx]
sorted_feature_names = np.array(feature_cols)[sorted_idx]

plt.figure(figsize=(8, 18))
plt.barh(sorted_feature_names, sorted_shap_values, color='dodgerblue')
plt.xlabel('Mean(|SHAP value|)', fontsize=12)
plt.title('Global Feature Importance (SHAP, no direction)', fontsize=14)
plt.grid(True, axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig('../results/mvp_origninal_data_shap_bar_plot_summary.png', dpi=300, bbox_inches='tight')
print("SHAP bar chart saved")

SHAP bar chart saved
