In [None]:
import dash_bootstrap_components as dbc
import dash
import dash_core_components as dcc 
import dash_html_components as html
import plotly.graph_objs as go
import plotly.express as px
import statsmodels.api as sm
import pandas as pd 
from datetime import datetime
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State
import joblib
import base64
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA

In [None]:
import os
os.getcwd()

In [None]:
#welcome message
message = ["WELCOME!", html.Br(), html.Br(),"This project was performed as part of the Erdős Institute's Spring 2021 Data Science Bootcamp.", 
                   html.Br(), html.Br(),"The data were provided by CoverMyMeds.", 
                   html.Br(), html.Br(),"Created by Kavya Mukundan, Siqi Sun, Ruqiah Muhammad."]
image_filename = 'qrcode.png'
def b64_image(image_filename): 
    with open(image_filename, 'rb') as f: image = f.read() 
    return 'data:image/png;base64,' + base64.b64encode(image).decode('utf-8')

In [None]:
#eda analysis
#Read summary as a dataframe
df = pd.read_csv('data/summary.csv')

#List of features in Claims Data
features_c = ["bin", "drug", "reject_code", "pharmacy_claim_approved"]

#Create freqeuncy bar graph for each feature in Claims Data
fig0 = px.bar(df, x=df.bin.value_counts().index.map(str), y=df.bin.value_counts(), labels=dict(x="bin", y="count"), color=df.bin.value_counts().index.map(str), color_discrete_sequence=["red", "green", "blue", "goldenrod", "magenta"])
fig1 = px.bar(df, x=df.drug.value_counts().index, y=df.drug.value_counts(), labels=dict(x="drug", y="count"),color=df.drug.value_counts().index, color_discrete_sequence=["red", "green", "blue"])
fig2 = px.bar(df, x=df.reject_code.value_counts(dropna=True).index.map(str), y=df.reject_code.value_counts(dropna=True), labels=dict(x="reject_code", y="count"),color=df.reject_code.value_counts(dropna=True).index.map(str), color_discrete_sequence=["red", "green", "blue"])
fig3 = px.bar(df, x=df.pharmacy_claim_approved.value_counts().index.map(str), y=df.pharmacy_claim_approved.value_counts(), labels=dict(x="pharmacy_claim_approved", y="count"),color=df.pharmacy_claim_approved.value_counts().index.map(str), color_discrete_sequence=["red", "green"])

#List of features in Prior Authorization Data
features_pa = ["correct_diagnosis", "tried_and_failed", "contraindication", "pa_approved"]

#Create freqeuncy bar graph for each feature in Prior Authorization Data
fig4 = px.bar(df, x=df.correct_diagnosis.value_counts().index.map(str), y=df.correct_diagnosis.value_counts(), labels=dict(x="correct_diagnosis", y="count"),color=df.correct_diagnosis.value_counts().index.map(str), color_discrete_sequence=["red", "green"])
fig5 = px.bar(df, x=df.tried_and_failed.value_counts().index.map(str), y=df.tried_and_failed.value_counts(), labels=dict(x="tried_and_failed", y="count"),color=df.tried_and_failed.value_counts().index.map(str), color_discrete_sequence=["red", "green"])
fig6 = px.bar(df, x=df.contraindication.value_counts().index.map(str), y=df.contraindication.value_counts(), labels=dict(x="contraindication", y="count"),color=df.contraindication.value_counts().index.map(str), color_discrete_sequence=["red", "green"])
fig7 = px.bar(df, x=df.pa_approved.value_counts().index.map(str), y=df.pa_approved.value_counts(), labels=dict(x="pa_approved", y="count"),color=df.pa_approved.value_counts().index.map(str), color_discrete_sequence=["red", "green"])

In [None]:
#pa pred analysis
claims = pd.read_csv('data/dim_claims.csv')

In [None]:
#pa volume analysis
df["week"] = df["date_val"].apply(lambda x: datetime. strptime(x,'%Y-%m-%d').strftime("%W"))
df_filt = df.dropna(subset=["pa_approved"])

provider = df['bin'].unique()
drug = df['drug'].unique()
year = df['calendar_year'].unique()
week = df["week"].unique()

tdf = df_filt[["calendar_year", "calendar_month", "week"]].drop_duplicates()
tdf['year-month'] = tdf["calendar_year"].map(str)+'-'+tdf['calendar_month'].map(str)

#for mode week
tdf_pred_w = tdf.set_index(["calendar_year", "week"])
for j in year:
    df_cal = df_filt[df_filt['calendar_year']==j]
    for i in week:
        PA_requests = len(df_cal[df_cal['week']==i])
        if PA_requests != 0:
            tdf_pred_w.loc[(j, i), "volume"] = PA_requests
tdf_pred_w = tdf_pred_w.reset_index(drop=True)
date = pd.Series([datetime. strptime(i,'%Y-%m') for i in tdf_pred_w["year-month"]])
tdf_n_w = pd.concat([date, tdf_pred_w["volume"]], axis=1)
tdf_n_w = tdf_n_w.rename(columns = {0:"year-month"})
tdf_n_w.index = tdf_n_w["year-month"]
#apply time shifting, best conversion!
df_log_w = np.log(tdf_n_w["volume"])
df_log_shift_w = df_log_w - df_log_w.shift()
df_log_shift_w.dropna(inplace=True)

X_w = df_log_shift_w.values
size_w = int(len(X_w) * 0.66)
train_w, test_w = X_w[0:size_w], X_w[size_w:len(X_w)]
pred_future_w = list(tdf_n_w["year-month"][size_w:len(X_w)])

#for mode month
tdf_pred_m = tdf.set_index(["calendar_year", "calendar_month"])
for j in year:
    df_cal = df_filt[df_filt['calendar_year']==j]
    for i in range(1, 13):
        PA_requests = len(df_cal[df_cal['calendar_month']==i])
        if PA_requests != 0:
            tdf_pred_m.loc[(j, i), "volume"] = PA_requests
tdf_pred_m = tdf_pred_m.reset_index(drop=True)
date = pd.Series([datetime. strptime(i,'%Y-%m') for i in tdf_pred_m["year-month"]])
tdf_n_m = pd.concat([date, tdf_pred_m["volume"]], axis=1)
tdf_n_m = tdf_n_m.rename(columns = {0:"year-month"})
tdf_n_m.index = tdf_n_m["year-month"]
#apply time shifting, best conversion!
df_log_m = np.log(tdf_n_m["volume"])
df_log_shift_m = df_log_m - df_log_m.shift()
df_log_shift_m.dropna(inplace=True)
X_m = df_log_shift_m.values
size_m = int(len(X_m) * 0.66)
train_m, test_m = X_m[0:size_m], X_m[size_m:len(X_m)]
pred_future_m = list(tdf_n_m["year-month"][size_m:len(X_m)])

In [None]:
#defalt period == 6
#for week
pred_future_w6 = pred_future_w.copy()
for i in range(6):
    pred_future_w6.append(pred_future_w6[-1]+relativedelta(months=1))
history_w = [x for x in train_w]
pred_w_6 = []
# walk-forward validation
for t in range(len(pred_future_w6)):
    model = SARIMAX(history_w, order=(0,1,2), seasonal_order=(0, 1, 2, 12), enforce_invertibility=False, enforce_stationarity=False)
    model_fit = model.fit(disp=-1)
    output = model_fit.forecast()
    yhat = float(output[0])
    pred_w_6.append(yhat)
    history_w.append(yhat)     
#for month
pred_future_m6 = pred_future_m.copy()
for i in range(6):
    pred_future_m6.append(pred_future_m6[-1]+relativedelta(months=1))
history_m = [x for x in train_m]
pred_m_6 = []
# walk-forward validation
for t in range(len(pred_future_m6)):
    model = ARIMA(history_m, order=(1,0,1))
    model_fit = model.fit(disp=-1)
    output = model_fit.forecast()
    yhat = float(output[0])
    pred_m_6.append(yhat)
    history_m.append(yhat)

In [None]:
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
CONTENT_STYLE = {
    "margin-left": "18rem",
    "margin-right": "2rem",
    "padding": "1rem 1rem",
}
nav = dbc.Nav(
    [
        dbc.NavItem(dbc.NavLink("Home", active="exact", href="/")),
        dbc.NavItem(dbc.NavLink("EDA", href="/eda", active="exact")),
        dbc.NavItem(dbc.NavLink("PA approval prediction", href="/pa_pred", active="exact")),
        dbc.NavItem(dbc.NavLink("PA volume prediction", href="/pa_volume", active="exact")),
    ],
    pills=True,
    horizontal='center'
)
content = html.Div(id="page-content", style=CONTENT_STYLE)
app.layout = html.Div(
    [
    html.H1("MEDWISE: HELP GET THE MEDS YOU NEED",style={'textAlign': 'center'}),
    dcc.Location(id="url"), nav, content
    ])

@app.callback(Output("page-content", "children"), [Input("url", "pathname")])
def render_page_content(pathname):
    if pathname == "/":        
        return home_layout
    elif pathname == "/eda":
        return eda_layout
    elif pathname == "/pa_pred":
        return pa_pred_layout
    elif pathname == "/pa_volume":
        return pa_volume_layout

home_layout = html.Div([
        html.P(message),
        html.Img(
            src=b64_image(image_filename),
            style={
                'height': '10%',
                'width': '10%'
            })
])

eda_layout = html.Div([
    html.Label('Which data would you like to explore?'),
    html.Div([
        dcc.RadioItems(
            id='c_pa_type',
            options=[{'label': 'Claims Data', 'value': 'Claims Data'},
                     {'label': 'Prior Authorization Data', 'value': 'Prior Authorization Data'}
                    ],
            labelStyle={'display': 'block'}
            )
            ]),
    html.Br(),
    html.Div(id ='type',children=[]),
])

pa_pred_layout = html.Div([
            html.Label('Please select one of the options below:'),
            dcc.RadioItems(id='Claim_PA', options=
                          [{'label': 'Check if the claim will get approved','value':'Claim'},
                          {'label' : 'Check if the PA will get approved', 'value': 'PA'}],
                          value='Claim',
                          labelStyle={'display': 'inline-block'}),
            #html.Br(),
            html.Div(id ='Claim_PA_result',children=[])
 
])

pa_volume_layout = html.Div([
            html.Label("Select prediction mode"),
            dcc.RadioItems(id="mode",  
                           options=[{'label': i, 'value': i} for i in ["week", "month"]],
                           value="week",
                           labelStyle={'display': 'inline-block'}),
            html.Label("Select prediction period"),
            dcc.Input(
                    id='period',
                    type='number',
                    value=6
                ),     
            #html.Br(),
            #html.Label("PA volume trend"),
            html.Div([
                    dcc.Graph(id='volume_pred')
                ], 
                style={'display': 'inline-block'})
    ])

#eda analysis
##Create callbacks and function for feature selection in Claims and PA data
@app.callback(Output(component_id='type',component_property='children'),
              Input(component_id='c_pa_type',component_property='value'))
def Update_type(c_pa_type_value):
    if c_pa_type_value == 'Claims Data':
        pickclaims = html.Div([
        html.Label('Select a feature below to see its frequency.'),
        html.Div([
            dcc.RadioItems(
                id='frequency_Claims',
                options=[{'label': i, 'value': i} for i in features_c]
                #labelStyle={'display': 'block'}
            )]),
        #html.Br(),
        html.Div([dcc.Graph(id = "graph_Claims")],
                style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'})
        ])
        return pickclaims
    elif c_pa_type_value == 'Prior Authorization Data':
        pickpa = html.Div([
        html.Label('Select a feature below to see its frequency.'),
        html.Div([
            dcc.RadioItems(
                id='frequency_PA',
                options=[{'label': i, 'value': i} for i in features_pa],
                value=features_pa[0]
                #labelStyle={'display': 'block'}
            )
            ]),
        #html.Br(),
        html.Div([dcc.Graph(id = "graph_PA")],
                style={'width': '49%', 'display': 'inline-block', 'padding': '0 20'})
        ])
        return pickpa
      
##Create callbacks and function for feature figures for Claims data
@app.callback(Output(component_id="graph_Claims",component_property='figure'),
              Input(component_id='frequency_Claims',component_property='value'), prevent_initial_call=True)
def Update_graph(frequency_c_value):
    if frequency_c_value == 'bin':
        pick_bin = fig0
        return pick_bin
    elif frequency_c_value == 'drug':
        pick_drug = fig1
        return pick_drug
    elif frequency_c_value == 'reject_code':
        pick_reject_code = fig2
        return pick_reject_code
    elif frequency_c_value == 'pharmacy_claim_approved':
        pick_pharmacy_claim_approved = fig3
        return pick_pharmacy_claim_approved
    
##Create callbacks and function for feature figures for PA data
@app.callback(Output(component_id="graph_PA",component_property='figure'),
              Input(component_id='frequency_PA',component_property='value'), prevent_initial_call=True)
def Update_graph(frequency_c_value):
    if frequency_c_value == 'correct_diagnosis':
        pick_correct_diagnosis = fig4
        return pick_correct_diagnosis
    elif frequency_c_value == 'tried_and_failed':
        pick_tried_and_failed = fig5
        return pick_tried_and_failed
    elif frequency_c_value == 'contraindication':
        pick_contraindication = fig6
        return pick_contraindication
    elif frequency_c_value == 'pa_approved':
        pick_pa_approved = fig7
        return pick_pa_approved
#pa pred analysis
@app.callback(
    Output(component_id='Claim_PA_result', component_property='children'),
    Input(component_id='Claim_PA', component_property='value'),
    State('Claim_PA_result', 'children'))
    
def Update_Claim_PA (Claim_PA_value,children):
    if Claim_PA_value == 'Claim':
        new_element_claim = html.Div([
            #html.Br(),
            html.Label("Select Drug Type"),
            html.Div([dcc.Dropdown(id="Drug_Claim" , options=[
                  {'label': 'Drug A', 'value': 'A'},
                  {'label': 'Drug B', 'value': 'B'},
                  {'label': 'Drug C', 'value': 'C'}
              ])], style={"width": "20%"}),
            #html.Br(),
            
            html.Label("Select Payer"),
            html.Div([dcc.Dropdown(id="Payer_Claim" , options=[
                  {'label': '417380', 'value': '417380'},
                  {'label': '999001', 'value': '999001'},
                  {'label': '417740', 'value': '417740'},
                  {'label': '417614', 'value': '417614'}
              ])], style={"width": "20%"}),
            html.Br(),
            html.Label("Prediction:"),
            html.Div(id='result_Claim')
        ])
        
        return new_element_claim

    if Claim_PA_value == 'PA' :
        new_element_PA = html.Div([
            #html.Br(),
            html.Label("Select Drug Type"),
            html.Div([dcc.Dropdown(id="Drug_PA" , options=[
                  {'label': 'Drug A', 'value': 'A'},
                  {'label': 'Drug B', 'value': 'B'},
                  {'label': 'Drug C', 'value': 'C'}
              ])],style={"width": "20%"}),
            #html.Br(),
            
            html.Label("Select Payer"),
            html.Div([dcc.Dropdown(id="Payer_PA" , options=[
                  {'label': '417380', 'value': 'C1'},
                  {'label': '999001', 'value': 'C2'},
                  {'label': '417740', 'value': 'C3'},
                  {'label': '417614', 'value': 'C4'}
              ])], style={"width": "20%"}),
            #html.Br(),
            
#             html.Label("Select rejection code"),
#             dcc.Dropdown(id="reject_code", options=[
#                 {'label': '70', 'value': '70'},
#                 {'label': '75', 'value': '75'},
#                 {'label':'76', 'value': '76'}
#             ]),
#             html.Br(),
            
            html.Label("Select Patient Information"),
            html.Label("Is the diagnosis correct?"),
            dcc.RadioItems(id="Correct_Diagnosis", options=[
                {'label':'Yes', 'value':'1'},
                {'label':'No','value':'0'}],labelStyle={'display': 'inline-block'}),
            #html.Br(),
            
            html.Label("Did the patient try and fail alternate medication?"),
            dcc.RadioItems(id="Tried_Failed",options=[
                {'label':'Yes', 'value':'1'},
                {'label':'No', 'value':'0'}
            ],labelStyle={'display': 'inline-block'}),
            #html.Br(),
            
            html.Label("Did the patient show contraindication to this medicine?"),
            dcc.RadioItems(id="Contraindication", options=[
                {'label':'Yes', 'value':'1'},
                {'label':'No','value':'0'}
            ],labelStyle={'display': 'inline-block'}),
            html.Br(),
            html.Label("Prediction:"),
            
            #html.Br(),
            html.Div(id="result_PA")
        ])

        return new_element_PA
    

@app.callback(
    Output(component_id='result_Claim', component_property='children'),
    [Input(component_id='Drug_Claim', component_property='value'),
     Input(component_id='Payer_Claim', component_property='value')])

def update_WillYouNeedAPA(Drug_Claim_value, Payer_Claim_value):
    
    X = np.array(['B','C','999001','417740','417614'])
    X1 = np.where((X==Drug_Claim_value)|(X==Payer_Claim_value), 1,0)
    proba = Claims_model.predict_proba(X1.reshape(1,-1))
    
    proba_1 = round((proba[0][1]*100),2)

    if (Drug_Claim_value is not None) & (Payer_Claim_value is not None) & (proba_1 < 80): 
        
#         reject_code = claims[(claims['bin']==Payer_Claim_value)&(claims['drug']=='Drug_Claim_value')&(claims['pharmacy_claim_approved']==0)]
        
        reject_code = claims[(claims['bin'].astype(str)==Payer_Claim_value)&(claims['drug']==Drug_Claim_value)&(claims['pharmacy_claim_approved']==0)]['reject_code']
        
        code = np.asarray(reject_code)[0]
        
        return f'''You selected: {Drug_Claim_value} and {str(Payer_Claim_value)}. Probability of your claim approval is {proba_1}%. 
                 You will need a PA. Possible reject code {code}'''
            
    elif (Drug_Claim_value is not None) & (Payer_Claim_value is not None) & (proba_1 > 80) :
        return f'''You selected: {Drug_Claim_value} and {Payer_Claim_value}. Probability of your claim approval is {proba_1}%. 
                     You may not need a PA.'''
    else:
        return ""
    

@app.callback(
    Output(component_id='result_PA', component_property='children'),
    [Input(component_id='Drug_PA', component_property='value'),
     Input(component_id='Payer_PA', component_property='value'),
     #Input(component_id='reject_code', component_property='value'),
     Input(component_id ='Correct_Diagnosis', component_property='value'),
     Input(component_id ='Tried_Failed', component_property='value'),
     Input(component_id ='Contraindication', component_property='value')])

def update_WillPAGetApproved(Drug_PA_value, Payer_PA_value,Correct_Diagnosis_value,Tried_Failed_value,Contraindication_value):
    
    X = np.asarray(['001','010','011','100','101','110','111','AC2','AC3','AC4','BC1','BC2','BC3',
         'BC4','CC1','CC2','CC3','CC4'])
    
    if (Drug_PA_value is not None) & (Payer_PA_value is not None) & (Correct_Diagnosis_value is not None) & (Tried_Failed_value is not None) & (Contraindication_value is not None):
        
        diagnosis = Correct_Diagnosis_value+Tried_Failed_value+Contraindication_value
        drug_payer = Drug_PA_value+Payer_PA_value
        
        X1 = np.where((X== diagnosis) | (X== drug_payer), 1, 0)
        
        proba = PA_model.predict_proba(X1.reshape(1,-1))
    
        proba_1 = round((proba[0][1]*100),2)
    
        return f'''The probability of the PA approval is {proba_1}%'''
    
#pa volume analysis
@app.callback(
    dash.dependencies.Output('volume_pred', 'figure'), 
    [dash.dependencies.Input('period','value'),
     dash.dependencies.Input('mode','value')
    ]
)
def update_volume_pred(value_period, value_mode):
    if value_mode == "week":
        size = size_w
        df_log = df_log_w.copy()
        pred_future = pred_future_w.copy()
        tdf_n = tdf_n_w.copy()
        X = X_w.copy()
        if value_period == 6:
            predictions = pred_w_6.copy()
            pred_future = pred_future_w6.copy()
        else:
            for i in range(value_period):
                pred_future.append(pred_future[-1]+relativedelta(months=1))
            history = [x for x in train_w]
            predictions = []
            # walk-forward validation
            for t in range(len(pred_future)):
                model = SARIMAX(history, order=(0,1,2), seasonal_order=(0, 1, 2, 12), enforce_invertibility=False, enforce_stationarity=False)
                model_fit = model.fit(disp=-1)
                output = model_fit.forecast()
                yhat = float(output[0])
                predictions.append(yhat)
                history.append(yhat)
    else:
        size = size_m
        df_log = df_log_m.copy()
        pred_future = pred_future_m.copy()
        tdf_n = tdf_n_m.copy()
        X = X_m.copy()
        if value_period == 6:
            predictions = pred_m_6.copy()
            pred_future = pred_future_m6.copy()
        else:
            for i in range(value_period):
                pred_future.append(pred_future[-1]+relativedelta(months=1))
            history = [x for x in train_m]
            predictions = []
            # walk-forward validation
            for t in range(len(pred_future)):
                model = ARIMA(history, order=(1,0,1))
                model_fit = model.fit(disp=-1)
                output = model_fit.forecast()
                yhat = float(output[0])
                predictions.append(yhat)
                history.append(yhat) 

    predictions_ARIMA_diff = pd.Series(predictions, copy=True)
    predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
    predictions_ARIMA_log = pd.Series([df_log[0:size][-1]]*len(pred_future)).add(predictions_ARIMA_diff_cumsum, fill_value=0)
    predictions_ARIMA = np.exp(predictions_ARIMA_log)
    #error = sqrt(mean_squared_error(tdf_n["volume"][size:len(X)], predictions_ARIMA[:(len(X)-size)]))
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pred_future, y=predictions_ARIMA,
                mode='lines+markers',
                name='predicted'))
    fig.add_trace(go.Scatter(x=tdf_n["year-month"], y=tdf_n["volume"],
                        mode='lines+markers',
                        name='input'))
    #title={'text': "PA volume trend", 'xanchor': 'center', 'yanchor': 'top'},
    fig.update_layout(
               xaxis_title='Time',
               yaxis_title='PA volume')
    #html.P("RMSE is "+ str(round(error)))
    return fig

if __name__ == '__main__':
    Claims_model = joblib.load("model/ClaimsModel_logistic.joblib")
    PA_model = joblib.load("model/PAModel_logistic.joblib")
    app.run_server()