<a href="https://colab.research.google.com/github/Bjoyita/SHAP_MaterialsDesign/blob/main/ANN_PF_kappa_multitarget_GH.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

# INSTALLING MATERIALS LIBRARIES
pip install pymatgen
pip install matminer

# LOADING DATA
ds_RT = pd.read_csv('Pf_kappa_RT_2.csv')
ds_RT

#DATA CLEANING AND PREPROCESSING
from pymatgen.core.composition import Composition
from matminer.featurizers.composition import ElementFraction
ef = ElementFraction()
element_fraction_labels = ef.feature_labels()
print(element_fraction_labels)

ds_RT['Formula']
Comp = []
for value in ds_RT['Formula']:
  Comp.append(Composition(value))
Comp

ds_RT['Composition'] = Comp
ds_RT = ef.featurize_dataframe(ds_RT,'Composition')
ds_RT = ds_RT.loc[:, (ds_RT != 0).any(axis=0)]
ds_RT = ds_RT.drop(['Composition'], axis = 1)
ds_RT.columns
ds_RT = ds_RT.reindex(columns = ['Formula','B', 'O', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'K', 'Ca',
       'Ti', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'Se', 'Sr',
       'Y', 'Zr', 'Nb', 'Mo', 'Ag', 'In', 'Sn', 'Sb', 'Te', 'I', 'Cs', 'Ba',
       'La', 'Ce', 'Nd', 'Sm', 'Gd', 'Tb', 'Dy', 'Ho', 'Yb', 'Hf', 'W', 'Au',
       'Tl', 'Pb', 'Bi', 'kappa', 'PF'])
ds_RT

X = ds_RT.iloc[:, 1: 50]
y = ds_RT.iloc[:,50:]

#SPLITTING X and y
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# DATA SCALING: NORMALIZATION
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
y_train = sc.fit_transform(y_train)
y_test = sc.transform(y_test)

#BUILDING THE ANN MODEL
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()

model.add(Dense(25, input_dim=49, kernel_initializer='he_uniform', activation='relu'))
model.add(Dense(25, kernel_initializer='he_uniform', activation='relu'))
model.add(Dense(2))

model.compile(loss ='mae', optimizer = 'adam')
model.summary()

history = model.fit(X_train, y_train, verbose = 'auto',epochs = 400)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

# PLOTTING THE LOSS
import matplotlib.pyplot as plt
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.ylim([0, 0.5])
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)

#EVALUATION
y_pred = model.predict(X_test)
test_loss = model.evaluate(X_test, y_test)

from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

#SHAP values: Interpretation of the model

pip install shap
import shap

# TRAINING DATA
explainer = shap.KernelExplainer(model = model, data = X_train,link = 'identity' )
shap_values = explainer.shap_values(X_train)
display(shap_values)

class_names = y.columns
shap.summary_plot(shap_values, class_names = class_names, feature_names = X_train.columns)

#Plotting influence on kappa
shap.summary_plot(shap_values[0], X_train)

#Plotting influence on PF
shap.summary_plot(shap_values[1], X_train)

# ON TEST DATA
shap_values_test = explainer.shap_values(X_test)
shap.summary_plot(shap_values_test, class_names = class_names, feature_names = X_test.columns)
shap.summary_plot(shap_values_test[0], X_test)  #....on kappa
shap.summary_plot(shap_values_test[1], X_test)  #... on PF 