# HEAP-DL wandb.ai Experiments

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import matplotlib.style as style
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

## Data Loading

In [None]:
DATA_PATH='data/clean/model_input.csv'

In [None]:
df = pd.read_csv(DATA_PATH,delimiter="|", usecols=["quarter","duration","rectype","pos","pat_age", "der_sex","dispense_fee","copay", "paid","age_group","bill_spec","formulary"])

In [None]:
df.describe(include='all').T

In [None]:
for i in df.columns:
    print("Column name {} and unique values are {}".format(i,len(df[i].unique())))

In [None]:
df.info()

In [None]:
# Create a column that represents the total cost = paid + copay + dispense_fee
target_columns = ['paid', 'copay', 'dispense_fee']
df['target'] = (df.loc[:, target_columns]).sum(axis='columns')

### Categorical Features

In [None]:
cat_features=["quarter","duration","rectype","bill_spec", "pos","der_sex","age_group","formulary"]
out_feature="target"
cat_features

In [None]:
df["pos"] = df["pos"].astype("str")
df["age_group"] = df["age_group"].astype("str")

### Label Encoding of Categorical Features

In [None]:
from sklearn.preprocessing import LabelEncoder
lbl_encoders={}
for feature in cat_features:
    print(feature)
    lbl_encoders[feature]=LabelEncoder()
    df[feature]=lbl_encoders[feature].fit_transform(df[feature])

### Scaling Numerical & Target columns

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer # Try
from sklearn.preprocessing import RobustScaler
#  Need to determine the scaler
#scaler = MinMaxScaler(feature_range=(0, 1))

scaler = RobustScaler()
df['target'] = scaler.fit_transform(df['target'].values.reshape(-1,1))
df['pat_age'] = scaler.fit_transform(df['pat_age'].values.reshape(-1,1))
plt.title("Distribution of Charges")
sns.kdeplot(df['target'])

In [None]:
def find_outliers_claims(x):
    q1 = np.percentile(x,25)
    q3 = np.percentile(x,75)
    iqr = q3-q1
    floor = q1 - 1.5*iqr
    ceiling = q3 + 1.5*iqr
    outlier_indices = list(x.index[(x<floor) | (x > ceiling)])
    outlier_values = list(x[outlier_indices])
    return outlier_indices, outlier_values

In [None]:

### Stacking and Converting Into Tensors
#["der_sex","pos","diag_admit","diag1","diag2","diag3","ndc","rectype","tos_flag","pat_region","pat_state","prscbr_spec"]
cat_features=np.stack([df['quarter'],df['duration'],df['rectype'],df['bill_spec'],df['pos'],df['der_sex'],df['age_group'],df['formulary']],1)

cat_features

### Convert Category Features to Torch Tensors

In [None]:
### Convert numpy to Tensors
import torch
cat_features=torch.tensor(cat_features,dtype=torch.int64)
cat_features

In [None]:
df["target"] = df["target"].astype("float")
df["pat_age"] = df["pat_age"].astype("float")
df["pat_age"].fillna(0)
cont_features=[]

for i in df.columns:
    if i in ["pat_age"]:
        cont_features.append(i)
    else:
        pass


### Stacking continuous variable to a tensor

In [None]:

cont_values=np.stack([df[i].values for i in cont_features],axis=1)
cont_values=torch.tensor(cont_values,dtype=torch.float)
cont_values

### Dependent Feature 

In [None]:

y=torch.tensor(df['target'].values,dtype=torch.float).reshape(-1,1)
y

In [None]:
cat_features.shape,cont_values.shape,y.shape

#### Embedding Size For Categorical columns

In [None]:
cat_dims=[len(df[col].unique()) for col in ["quarter", "duration","rectype","bill_spec","pos","der_sex","age_group","formulary"]]

In [None]:
cat_dims

In [None]:
embedding_dim= [(x, min(50, (x + 1) // 2)) for x in cat_dims]

In [None]:
embedding_dim

### Defining the Embedding Layers

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
embed_representation=nn.ModuleList([nn.Embedding(inp,out) for inp,out in embedding_dim])
embed_representation

In [None]:
embedding_val=[]
for i,e in enumerate(embed_representation):
    embedding_val.append(e(cat_features[:,i]))

In [None]:
z = torch.cat(embedding_val, 1)
z

In [None]:

final_embed=z
final_embed.shape


HEAP-DL has four hidden layers each having 50 neurons each. All layers uses a ReLU-activation step, BatchNorm1D also acts as a regularization step and a dropout rate of 0.2 during training. The categorical features were converted into feature embeddings and passed to the layer and there is a drop out of 0.2 applied to the embedding. The predicted target variable is a transformed sum value of paid, dispense and copay. The model was trained for 200 epochs and RMSE score, Target columns a derived numerical values which is  a sum of claims paid amount, pharmacy dispense_fee & patient copay since these are assumed to be the direct cost for the payer as per the IQVIA Pharmetrics data dictionary and user guide.

### Create a Feed Forward Neural Network

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
class FeedForwardNN(nn.Module):

    def __init__(self, embedding_dim, n_cont, out_sz, layers, p=0.5):
        super().__init__()
        self.embeds = nn.ModuleList([nn.Embedding(inp,out) for inp,out in embedding_dim])
        self.emb_drop = nn.Dropout(p)
        self.bn_cont = nn.BatchNorm1d(n_cont)
        
        layerlist = []
        n_emb = sum((out for inp,out in embedding_dim))
        n_in = n_emb + n_cont
        
        for i in layers:
            #Linear Layer
            layerlist.append(nn.Linear(n_in,i)) 
            
            #ReLU Activation Layer
            layerlist.append(nn.ReLU(inplace=True))

            #BatchNorm 1D Layer - Regularization
            layerlist.append(nn.BatchNorm1d(i))
            
            #DropOut Layer
            layerlist.append(nn.Dropout(p))
            n_in = i
        layerlist.append(nn.Linear(layers[-1],out_sz))
            
        self.layers = nn.Sequential(*layerlist)
    
    def forward(self, x_cat, x_cont):
        embeddings = []
        for i,e in enumerate(self.embeds):
            embeddings.append(e(x_cat[:,i]))
        x = torch.cat(embeddings, 1)
        x = self.emb_drop(x)
        
        x_cont = self.bn_cont(x_cont)
        x = torch.cat([x, x_cont], 1)
        x = self.layers(x)
        return x

### Model Initialization

In [None]:
torch.manual_seed(100)

### HEAP-DL Model Summary

In [None]:
import wandb
wandb.init(project='heap-dl-experiements', entity='heap-dl-team1745')
wandb.login()

In [None]:
sweep_config = {
    'method': 'random'
    }

In [None]:
metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

In [None]:
parameters_dict = {
    'optimizer': {
        'values': ['adam', 'sgd','adamw','rmsprop']
        },
    'fc1_layer_size': {
        'values': [50, 100]
        },
    'fc2_layer_size': {
        'values': [50, 100]
        },
    'fc3_layer_size': {
        'values': [50, 100]
        },
    'fc4_layer_size': {
        'values': [50, 100]
        },
    'dropout': {
          'values': [0.1,0.2,0.3, 0.4, 0.5]
        },
    'loss_function':{
        'values': ['MSE']
    }
    }

sweep_config['parameters'] = parameters_dict

In [None]:
parameters_dict.update({
    'epochs': {
        'values': [100,200,300,500]}
    })

In [None]:
import math

parameters_dict.update({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0,
        'max': 0.1
      },
    'batch_size': {
        'values': [25000, 50000,75000,100000,125000, 135000]
      },
    'test_ratio': {
        
        'values': [0.10, 0.15, 0.20,0.25,0.30]
      },
    })

In [None]:
import pprint

pprint.pprint(sweep_config)

In [None]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn

def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = optim.Adam(network.parameters(),
                               lr=learning_rate)
    elif optimizer =="adamw":
        optimizer = optim.AdamW(network.parameters(),
                               lr=learning_rate)
    elif optimizer =="rmsprop":
        optimizer = optim.RMSprop(network.parameters(),
                               lr=learning_rate)
    return optimizer

def build_loss_function(network, loss):
    if loss == "MSE":
        loss_function=nn.MSELoss()
   
    return loss_function

def build_network(fc1_layer_size,fc2_layer_size,fc3_layer_size,fc4_layer_size, dropout,embedding_dim, cont_features):
    model=FeedForwardNN(embedding_dim,len(cont_features),1,[fc1_layer_size,fc2_layer_size,fc3_layer_size,fc4_layer_size],p=dropout)

    return model

def build_dataset(batch_size,test_ratio,categorical, continuos, y):
    batch_size=batch_size
    
    test_size=int(batch_size*test_ratio)
    data_categorical=categorical[:batch_size-test_size]
    data_cont=continuos[:batch_size-test_size]
    y_data=y[:batch_size-test_size]
    
    return data_categorical, data_cont, y_data

### Loss Function and Optimizer

In [None]:
sweep_id = wandb.sweep(sweep_config, project="heap-dl-experiements", entity='heap-dl-team1745')

In [None]:
# batch_size=50000
# test_size=int(batch_size*0.15)
# train_categorical=cat_features[:batch_size-test_size]
# test_categorical=cat_features[batch_size-test_size:batch_size]
# train_cont=cont_values[:batch_size-test_size]
# test_cont=cont_values[batch_size-test_size:batch_size]
# y_train=y[:batch_size-test_size]
# y_test=y[batch_size-test_size:batch_size]

In [None]:
len(train_categorical),len(test_categorical),len(train_cont),len(test_cont),len(y_train),len(y_test)

In [None]:
def train(config=None):
    # Initialize a new wandb run
    
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        final_losses=[]
        #loader = build_dataset(config.batch_size)
        model = build_network(config.fc1_layer_size,config.fc2_layer_size,config.fc3_layer_size,config.fc4_layer_size,config.dropout, embedding_dim, cont_features)
        optimizer = build_optimizer(model, config.optimizer, config.learning_rate)
        loss_function = build_loss_function(model, config.loss_function)
        train_categorical = None
        train_cont = None 
        y_train = None
        train_categorical,train_cont,y_train = build_dataset(config.batch_size,config.test_ratio,cat_features, cont_values, y)
        
        model.train()
        for i in range(config.epochs):
            i=i+1
            
            y_pred=model(train_categorical,train_cont)
            loss=torch.sqrt(loss_function(y_pred,y_train)) ### RMSE
            wandb.log({"loss": loss, "epoch": i})   
            final_losses.append(loss)
            if i%10==1:
                print("Epoch number: {} and the loss : {}".format(i,loss.item()))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
                    

In [None]:
wandb.agent(sweep_id, train, count=150)

### HEAP-DL Model Training

In [None]:
# # Set Model in Training Mode
# model.train()
# epochs=200
# final_losses=[]
# for i in range(epochs):
#     i=i+1
#     y_pred=model(train_categorical,train_cont)
#     loss=torch.sqrt(loss_function(y_pred,y_train)) ### RMSE
#     wandb.log({"loss": loss})
#     final_losses.append(loss)
#     if i%10==1:
#         print("Epoch number: {} and the loss : {}".format(i,loss.item()))
#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(range(epochs), final_losses)
plt.ylabel('RMSE Loss')
plt.xlabel('epoch');

#### Validate the Test Data
HEAP-DL model acheived an **RMSE score of 7.74**

In [None]:
# Set Model in Evaluate Mode
y_pred=""
model.eval()
with torch.no_grad():
    y_pred=model(test_categorical,test_cont)
    loss=torch.sqrt(loss_function(y_pred,y_test))
print('RMSE: {}'.format(loss))

### Test Prediction and Difference for Visualization

In [None]:
data_verify=pd.DataFrame(y_test.tolist(),columns=["Test"])
data_predicted=pd.DataFrame(y_pred.tolist(),columns=["Prediction"])
final_output=pd.concat([data_verify,data_predicted],axis=1)
final_output['Difference']=final_output['Test']-final_output['Prediction']
final_output.sample(3)

In [None]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(go.Scatter(x=final_output['Test'].index, y=final_output['Test'],
                    mode='lines',
                    name='Actual Value')))
fig.add_trace(go.Scatter(go.Scatter(x=final_output['Prediction'].index, y=final_output['Prediction'],
                    mode='lines',
                    name='Test prediction')))

fig.update_layout(
    xaxis=dict(
        showline=True,
        showgrid=True,
        showticklabels=False,
        linecolor='white',
        linewidth=1
    ),
    yaxis=dict(
        title_text='Paid Amount',
        titlefont=dict(
            family='Rockwell',
            size=12,
            color='white',
        ),
        showline=True,
        showgrid=True,
        showticklabels=True,
        linecolor='white',
        linewidth=1,
        ticks='outside',
        tickfont=dict(
            family='Rockwell',
            size=12,
            color='white',
        ),
    ),
    showlegend=True,
    template = 'plotly_dark'

)



annotations = []
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                              xanchor='left', yanchor='bottom',
                              text='HEAP-DL Results',
                              font=dict(family='Rockwell',
                                        size=26,
                                        color='white'),
                              showarrow=False))
fig.update_layout(annotations=annotations)

fig.show()

### Saving the Model with State Dictionaries

In [None]:
torch.save(model.state_dict(),'/data/workspace_files/model/heapdl-baseline.pt')

### Loading model and using prior defined Embedding dimensions

In [None]:
embs_size=[(4, 2), (5, 3), (6, 3), (64, 32), (43, 22), (3, 2), (10, 5), (5, 3)]


model1= FeedForwardNN(embs_size,len(cont_features),1,[50,50,50,50],p=0.2)
model1.load_state_dict(torch.load('/data/workspace_files/model/heapdl-baseline.pt'))

In [None]:
model1

## Integrated Gradients using Captum
Reference: https://captum.ai/docs/introduction

In [None]:
# imports from captum library
from captum.attr import LayerConductance, LayerActivation, LayerIntegratedGradients
from captum.attr import IntegratedGradients, DeepLift, GradientShap, NoiseTunnel, FeatureAblation
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

In [None]:
lig = LayerIntegratedGradients(model1, model1.emb_drop)

In [None]:
attributions_start, delta_start = lig.attribute(inputs=(test_categorical,test_cont), baselines=(test_categorical,test_cont), return_convergence_delta=True)

In [None]:
ig_attr_test_sum = attributions_start.detach().numpy().sum(0)
ig_attr_test_norm_sum = ig_attr_test_sum / np.linalg.norm(ig_attr_test_sum, ord=1)
ig_attr_test_norm_sum.shape

In [None]:
x_axis_data = np.arange(72)
#x_axis_data_labels = list(map(lambda idx: attributions_start[:,idx], x_axis_data))

ig_attr_test_sum = attributions_start.detach().numpy().sum(0)
ig_attr_test_norm_sum = ig_attr_test_sum / np.linalg.norm(ig_attr_test_sum, ord=1)

lin_weight = model.layers[4].weight[0].detach().numpy()
y_axis_lin_weight = lin_weight / np.linalg.norm(lin_weight, ord=1)

width = 0.3
legends = ['Int Grads']

plt.figure(figsize=(20, 10))

ax = plt.subplot()
ax.set_title('Comparing input feature importances across multiple algorithms and learned weights')
ax.set_ylabel('Attributions')

FONT_SIZE = 9
plt.rc('font', size=FONT_SIZE)            # fontsize of the text sizes
plt.rc('axes', titlesize=FONT_SIZE)       # fontsize of the axes title
plt.rc('axes', labelsize=FONT_SIZE)       # fontsize of the x and y labels
plt.rc('legend', fontsize=FONT_SIZE - 4)  # fontsize of the legend

ax.bar(x_axis_data[:70], ig_attr_test_norm_sum[:70], width, align='center', alpha=0.8, color='#eb5e7c')
ax.autoscale_view()
plt.tight_layout()

ax.set_xticks(x_axis_data[:70])
#ax.set_xticklabels(x_axis_data_labels)

plt.legend(legends, loc=3)
plt.show()