In [None]:
import numpy as np
import pandas as pd
import sys
sys.path.insert(0, '/src')
import run_model
import shap
from data_models import BaseModel

In [None]:
client='infinity-infinity'
facilityids = [75]
prediction_date = '2020-02-15'
s3_bucket = 'saiva-dev-data-bucket'
test = True

In [None]:
from shared.constants import MODELS
modelid = MODELS[client][facilityids[0]]
modelid

In [None]:
predict_obj = run_model.RunPredictions()

In [None]:
predict_obj.run_model(client, s3_bucket, prediction_date, facilityids, test)

In [None]:
explainer = shap.TreeExplainer(predict_obj.clf.model)
shap_values = explainer.shap_values(predict_obj.final_csr)

In [None]:
final_x = pd.DataFrame.sparse.from_spmatrix(predict_obj.final_csr)
final_x.shape

In [None]:
final_idens = predict_obj.idens
final_idens

In [None]:
# get the column names used by the model
all_colnames = pd.read_csv(
            f'/data/models/{modelid}/artifacts/input_features.csv')
all_colnames

In [None]:
shap_results = []

for idx, row in final_x.iterrows():
    shaps = pd.DataFrame(
        {
            "feature": all_colnames.feature.values,
            "attribution_score": shap_values[1][idx],
            "feature_value": final_x.iloc[idx],
        }
    )

    shaps["masterpatientid"] = final_idens.iloc[idx].masterpatientid
    shaps["facilityid"] = final_idens.iloc[idx].facilityid
    shaps["censusdate"] = final_idens.iloc[idx].censusdate

    shap_results.append(shaps)
print(len(shap_results))

In [None]:
results = pd.concat(shap_results)
results

In [None]:
results["attribution_rank"] = results.groupby(['masterpatientid', 'facilityid']).attribution_score.rank(
        ascending=False)
results

In [None]:
results[(results.masterpatientid == 97992)].sort_values("attribution_rank")

In [None]:
results["client"] = predict_obj.client
results["modelid"] = modelid
results['censusdate'] = pd.to_datetime(results.censusdate)
results

In [None]:
results.dtypes

In [None]:
def process_attributions(attributions):    
    type_mapping_dict = {
        r'^rx_.*' : 'Medication',
        r'^dx_.*' : 'Diagnosis',
        r'^vitals_.*': 'Vital',
        r'^demo_.*': 'Demographic',
        r'^notes_swem_.*': 'Progress Note',
        r'^stays_.*': 'Stays',
    }
    
    prefix_remover_dict = {
        r'^rx_' : '',
        r'^dx_' : 'Code ',
        r'^vitals_': '',
        r'^demo_': '',
        r'^notes_swem_': '',
        r'^stays_': '',
    }

    attributions['feature_type'] = attributions['feature'].replace(type_mapping_dict, regex=True)
    attributions['feature_suffix'] = attributions['feature'].replace(prefix_remover_dict, regex=True)
    attributions['human_readable_name'] = (attributions['feature_type'] + ' ' + attributions['feature_suffix'] + '; feature_value: ' + attributions['feature_value'].astype(str)).sparse.to_dense()
    attributions['mapping_status'] = 'MAPPED'
    
    return attributions

In [None]:
results = process_attributions(results)
results[results.attribution_rank < 10].sort_values(['masterpatientid', 'attribution_rank'])

In [None]:
final = (
        results
        .loc[
            results.attribution_rank <= 100
        ]
    )
final

In [None]:
final['feature_type'].value_counts()

In [None]:
final.dtypes

In [None]:
db_engine = predict_obj.saiva_engine
db_engine

In [None]:
db_engine.execute(
    f"""delete from shap_values where censusdate = '{prediction_date}' and facilityid = '{facilityids[0]}' and client = '{client}' and modelid = '{modelid}'"""
)

final.to_sql(
    "shap_values", db_engine, if_exists="append", index=False, method="multi"
)