# Objective: Build an Interactive Dashboard To Surface Trends & Patterns in Provider MIPS Scores and Measures

1. Read in the data

In [2]:
import pandas as pd
import numpy as np   

In [35]:
#using 3 files: CMS Physician Compare (provider characteristics) and MIPS measures datasets
#provider characteristics
docs_raw = pd.read_csv('../data/raw/DAC_NationalDownloadableFile.csv', dtype='str', encoding='latin1')
#doctors individual MIPS measures
mm_raw = pd.read_csv('../data/raw/ec_public_reporting.csv', dtype='str')
#doctors overall MIPS scores
mips_raw = pd.read_csv('../data/raw/ec_score_file.csv', dtype='str')

2. Clean the data (missing values, duplicates, outliers)

In [36]:
#drop duplicates
docs_raw.drop_duplicates(inplace=True)
mm_raw.drop_duplicates(inplace=True)
mips_raw.drop_duplicates(inplace=True)
#get rid of column names extra spaces
docs_raw.columns = docs_raw.columns.str.strip()
mm_raw.columns = mm_raw.columns.str.strip()
mips_raw.columns = mips_raw.columns.str.strip()

In [37]:
docs_raw.drop(columns=['Ind_PAC_ID', 'Ind_enrl_ID','mid_nm','suff', 'sec_spec_2', 'sec_spec_3', 'sec_spec_4', 'sec_spec_all',
       'org_nm', 'adr_ln_1', 'adr_ln_2','ln_2_sprs', 'cty', 'phn_numbr', 'ind_assgn', 'grp_assgn','adrs_id'],inplace=True)

In [43]:
mm_raw.drop(columns=['Ind_PAC_ID','lst_nm','frst_nm','APM_affl_1', 'APM_affl_2',
       'APM_affl_3','collection_type', 'CCXP_ind'],inplace=True)

In [44]:
mm_raw.columns

Index(['NPI', 'measure_cd', 'measure_title', 'invs_msr', 'attestation_value',
       'prf_rate', 'patient_count', 'star_value', 'five_star_benchmark'],
      dtype='object')

In [45]:
mips_raw.columns
mips_raw.drop(columns=['Provider Last Name', 'Provider First Name',
       'source', 'Facility-based scoring Certification number',
       'Facility Name'],inplace=True)

##### make the master provider level dataframe, joined by NPI

In [46]:
#We don't care about the providers who do not participate in MIPS, for this project. We know 61% don't
#drop nan for NPI keys, there's no point otherwise
docs_raw.dropna(subset=['NPI'], inplace=True)
mm_raw.dropna(subset=['NPI'], inplace=True)
mips_raw.dropna(subset=['NPI'], inplace=True)

In [None]:
#take the overall MIPS scores and left join with MIPS measures (1: many, so more rows)
mips_mm_raw = mips_raw.merge(mm_raw, how='left', on='NPI')

In [52]:
mips_mm_raw

Unnamed: 0,NPI,Org_PAC_ID,Quality_category_score,PI_category_score,IA_category_score,Cost_category_score,final_MIPS_score_without_CPB,final_MIPS_score,measure_cd,measure_title,invs_msr,attestation_value,prf_rate,patient_count,star_value,five_star_benchmark
0,1003028101,,0,,0,,0,0,,,,,,,,
1,1003028556,,100,,40,15.19,74.557,78.8092,,,,,,,,
2,1003220351,,,,,,75,75,,,,,,,,
3,1003808494,,0,,0,28.192,8.4576,8.4576,,,,,,,,
4,1003813783,,83.6417,,40,,91.8208,94.6966,IA_EC_AHE_1,Engagement of New Medicaid Patients and Follow-up,N,Y,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1196046,1982609657,9931537891,100,100,40,,100,100,PI_EC_PEA_1,Provide Patients Electronic Access to Their He...,N,,95,653,4,100
1196047,1982609657,9931537891,100,100,40,,100,100,PI_EC_PHCDRR_1_EX_1,Immunization Registry Reporting Exclusion,N,Y,,,,
1196048,1982609657,9931537891,100,100,40,,100,100,PI_EC_PHCDRR_4,Public Health Registry Reporting,N,Y,,,,
1196049,1982609657,9931537891,100,100,40,,100,100,PI_EC_PPHI_1,Security Risk Analysis,N,Y,,,,


In [55]:
#then take that and add in the docs information
docs_mips_all_raw = mips_mm_raw.merge(docs_raw, how='left', on='NPI')

In [None]:
docs_mips_all_raw
docs_mips_all_raw.drop_duplicates(inplace=True)
docs_mips_all_raw.drop_duplicates(subset=['NPI', 'Measure ID'], inplace=True)

Unnamed: 0,NPI,Org_PAC_ID,Quality_category_score,PI_category_score,IA_category_score,Cost_category_score,final_MIPS_score_without_CPB,final_MIPS_score,measure_cd,measure_title,...,Cred,Med_sch,Grd_yr,pri_spec,sec_spec_1,Telehlth,org_pac_id,num_org_mem,st,zip
0,1003028101,,0,,0,,0,0,,,...,,NEW YORK COLLEGE OF PODIATRIC MEDICINE,1983,PODIATRY,,,,,NY,115613402
1,1003028556,,100,,40,15.19,74.557,78.8092,,,...,,OTHER,2000,NEUROLOGY,,Y,,,CA,930308228
2,1003220351,,,,,,75,75,,,...,,STATE UNIVERSITY OF NEW YORK - STATE COLLEGE O...,2014,OPTOMETRY,,,,,NJ,070943654
3,1003808494,,0,,0,28.192,8.4576,8.4576,,,...,MD,"UNIVERSITY OF CALIFORNIA, GEFFEN SCHOOL OF MED...",1986,OPHTHALMOLOGY,,,,,CA,913254145
4,1003808494,,0,,0,28.192,8.4576,8.4576,,,...,MD,"UNIVERSITY OF CALIFORNIA, GEFFEN SCHOOL OF MED...",1986,OPHTHALMOLOGY,,,3375737844,2,CA,911052664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2606824,1982609657,9931537891,100,100,40,,100,100,PI_EC_PEA_1,Provide Patients Electronic Access to Their He...,...,MD,WEST VIRGINIA UNIVERSITY SCHOOL OF MEDICINE,1982,ALLERGY/IMMUNOLOGY,,Y,,,FL,329353145
2606825,1982609657,9931537891,100,100,40,,100,100,PI_EC_PHCDRR_1_EX_1,Immunization Registry Reporting Exclusion,...,MD,WEST VIRGINIA UNIVERSITY SCHOOL OF MEDICINE,1982,ALLERGY/IMMUNOLOGY,,Y,,,FL,329353145
2606826,1982609657,9931537891,100,100,40,,100,100,PI_EC_PHCDRR_4,Public Health Registry Reporting,...,MD,WEST VIRGINIA UNIVERSITY SCHOOL OF MEDICINE,1982,ALLERGY/IMMUNOLOGY,,Y,,,FL,329353145
2606827,1982609657,9931537891,100,100,40,,100,100,PI_EC_PPHI_1,Security Risk Analysis,...,MD,WEST VIRGINIA UNIVERSITY SCHOOL OF MEDICINE,1982,ALLERGY/IMMUNOLOGY,,Y,,,FL,329353145


3. Visualization of trends & patterns

In [None]:
### What are the most common measures submitted?
attested = mm[mm['attestation_value'] == 'Y']
attested['measure_title'].value_counts().reset_index().sort_values(by='count', ascending=False).head(50)
len(attested)

81990