In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# bring in combined pickle file all years 2015-2017


In [None]:
df_payments_combined = pd.read_pickle('../data/df_payments_combined.pkl')

- Chart showing payments (average_medicare_allowed_amount) over time
  -Two charts, based on Entity (I, O) for Individual and for Organization
- Chart showing counts over time (count of Beneficiaries and count of Services)

# Creating New Column: payment_type

- Entity I, POS F = Doctor Only
- Entity O, POS F = Facility Only
- Entity I, POS O = Doctor & Facility
- Entity O, POS O = Doctor & Facility

In [None]:
conditions = [
    (df_payments_combined['place_of_service'] == 'O'),
    (df_payments_combined['entity_type_of_the_provider'] == 'I') & (df_payments_combined['place_of_service'] == 'F'),
    (df_payments_combined['entity_type_of_the_provider'] == 'O') & (df_payments_combined['place_of_service'] == 'F')]

choices = ['Doctor & Facility','Doctor Only', 'Facility Only']

In [None]:
df_payments_combined['payment_type'] = np.select(conditions, choices, default='unknown')

In [None]:
df_payments_combined.tail()

Convert to csv to create visuals

In [None]:
df_payments_combined.to_csv('../data/allyearscombined.csv')

In [None]:
df_payments_combined['payment_type'].value_counts()

# Groupby NPI/HCPCS/POS/Entity; POS/Entity is now payment_type

In [None]:
%%time
df_avg_pmt = df_payments_combined.groupby(['year',
                                           'payment_type',
                                           'hcpcs_code']).average_medicare_allowed_amount.mean().to_frame().reset_index()

In [None]:
df_avg_pmt

# Measure how payments and counts change over time
“Counts” = count of Beneficiaries and count of Services

Mean of number_of_distinct_medicare_beneficiary_per_day_services PER hcpcs PER year

In [None]:
df_avg_services_day = df_payments_combined.groupby(['year',
                                                    'hcpcs_code']).number_of_distinct_medicare_beneficiary_per_day_services.mean().to_frame().reset_index()

In [None]:
df_avg_services_day

Median of number_of_distinct_medicare_beneficiary_per_day_services PER hcpcs PER year

In [None]:
df_med_services_day = df_payments_combined.groupby(['year',
                                                    'hcpcs_code']).number_of_distinct_medicare_beneficiary_per_day_services.median().to_frame().reset_index()

In [None]:
df_med_services_day

# Pivot Dataframes - for years to be side by side (Code from Diego)

Payment Data

In [None]:
df_payments_combined.columns

In [None]:
pivot_index = ['national_provider_identifier',
               'entity_type_of_the_provider', 
               'place_of_service',
               'payment_type',
               'provider_type',
               'hcpcs_code',
               'hcpcs_description',
               'zip_code_of_the_provider', 
               'state_code_of_the_provider']

pivot_cols = ['year'] #should this be index?

Avg Medicare Allowed Amount

In [None]:
%%time
df_pmt_pvt = df_payments_combined.pivot_table(index = pivot_index, 
                                              columns = pivot_cols, 
                                              values = 'average_medicare_allowed_amount', 
                                              aggfunc=np.mean)
df_pmt_pvt = df_pmt_pvt.reset_index()

In [None]:
print(df_pmt_pvt.shape)
df_pmt_pvt.head()

In [None]:
df_payments_combined.dtypes

# Analysis by # of beneficiaries/day (based on Number of Distinct Medicare Beneficiary/Per Day Services)

In [None]:
%%time
df_bpd_pvt = df_payments_combined.pivot_table(index = pivot_index, 
                                              columns = pivot_cols, 
                                              values = 'number_of_distinct_medicare_beneficiary_per_day_services', 
                                              aggfunc=np.mean)
df_bpd_pvt = df_bpd_pvt.reset_index()

In [None]:
print(df_bpd_pvt.shape)
df_bpd_pvt.head()

In [None]:
df_bpd_pvt.to_csv('../data/beneperday.csv')

In [None]:
df_bpd_pvt.describe(include = 'all')

In [None]:
df_bpd_pvt.info(verbose = True, null_counts = True)

if 2015 & 2017 values are null, fill with 0.

In [None]:
df_bpd_pvt = df_bpd_pvt[df_bpd_pvt[2015].notna()]  
df_bpd_pvt = df_bpd_pvt[df_bpd_pvt[2017].notna()]
#df_bpd_pvt[2016] = df_bpd_pvt[2016].fillna(0) #fillna with 0 if agg. by sum - THIS IS NOT WORKING, EARLIER JUST DROPPED THESE COLUMNS.
df_bpd_pvt.tail()

In [None]:
df_bpd_pvt.shape

Create column of bpd_change from 2015 to 2017

In [None]:
df_bpd_pvt['bpd_change_17_15'] = df_bpd_pvt[2017] - df_bpd_pvt[2015]

In [None]:
#remove npi -should that be included?  added 2015 & 2017 columns from original code 
#do we need hcpcs description here?
df_bpd_pvt = df_bpd_pvt.groupby(['national_provider_identifier',
                                 'hcpcs_code','payment_type',2015,2016, 2017]).bpd_change_17_15.mean().to_frame().reset_index()
df_bpd_pvt

Code below from Ari to sort by max to min

In [None]:
df_bpd_sorted=df_bpd_pvt.sort_values('bpd_change',ascending=False)
df_bpd_sorted.reset_index()

In [None]:
#groupby hcpcs_code and payment_type 
#df_bpd_sorted = df_bpd_pvt.groupby(['payment_type','hcpcs_code']).bpd_change.mean().to_frame().reset_index()
#df_bpd_sorted.sort_values('bpd_change',ascending=False)

In [None]:
#calculate percent change - possible absolute value of change?
df_bpd_sorted['%_Change']=  ((df_bpd_pvt['bpd_change'])/(df_bpd_pvt[2017])*100) 

In [None]:
df_bpd_sorted

In [None]:
# ^ export this df to csv (add state possibly)

In [None]:
#create new df with group by hcpcs code (1 row per code), do %change after or through Tableau

In [None]:
df = df_bpd_sorted.loc[df_bpd_sorted['hcpcs_code'] == 'G0471']
df

In [None]:
df_bpd_sorted['bpd_change'].sort_values().head(20) #keep the highest change values: top 20? (+ or -)

In [None]:
df_bpd_sorted['bpd_change'].sort_values().tail(20)

In [None]:
df_bpd_sorted['bpd_change'].value_counts(ascending=False)

In [None]:
#sns.pairplot(df_bpd_pvt)