### import packages

In [1]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

### read in combined data

In [2]:
df_payments_combined = pd.read_pickle('../data/1_medicare_data/pickled_files/payments_combined.pkl')

In [3]:
df_payments_combined.head()

Unnamed: 0,national_provider_identifier,last_name_organization_name_of_the_provider,entity_type_of_the_provider,city_of_the_provider,zip_code_of_the_provider,state_code_of_the_provider,provider_type,place_of_service,hcpcs_code,hcpcs_description,number_of_services,number_of_medicare_beneficiaries,number_of_distinct_medicare_beneficiary_per_day_services,average_medicare_allowed_amount,year
0,1003000126,ENKESHAFI,I,CUMBERLAND,215021854,MD,Internal Medicine,F,99217,Hospital observation care discharge,23.0,23.0,23.0,72.68,2015
1,1003000126,ENKESHAFI,I,CUMBERLAND,215021854,MD,Internal Medicine,F,99219,Hospital observation care typically 50 minutes,18.0,18.0,18.0,135.85,2015
2,1003000126,ENKESHAFI,I,CUMBERLAND,215021854,MD,Internal Medicine,F,99221,"Initial hospital inpatient care, typically 30 ...",59.0,58.0,59.0,101.365085,2015
3,1003000126,ENKESHAFI,I,CUMBERLAND,215021854,MD,Internal Medicine,F,99222,"Initial hospital inpatient care, typically 50 ...",132.0,130.0,132.0,139.010455,2015
4,1003000126,ENKESHAFI,I,CUMBERLAND,215021854,MD,Internal Medicine,F,99223,"Initial hospital inpatient care, typically 70 ...",220.0,215.0,220.0,205.185955,2015


### Create new column for type of payment

In [4]:
conditions = [
    (df_payments_combined.place_of_service == 'O'),
    (df_payments_combined.entity_type_of_the_provider == 'I') & (df_payments_combined.place_of_service == 'F'),
    (df_payments_combined.entity_type_of_the_provider == 'O') & (df_payments_combined.place_of_service == 'F')]

choices = ['Doctor & Facility', 'Doctor Only', 'Facility Only']

In [5]:
%%time
df_payments_combined['payment_type'] = np.select(conditions, choices, default = 'unknown')

Wall time: 10.1 s


In [6]:
df_payments_combined.payment_type.value_counts()

Doctor & Facility    17733816
Doctor Only          11048040
Facility Only          278374
Name: payment_type, dtype: int64

### Pivot to get years side by side

### Payment Data

In [7]:
df_payments_combined.columns

Index(['national_provider_identifier',
       'last_name_organization_name_of_the_provider',
       'entity_type_of_the_provider', 'city_of_the_provider',
       'zip_code_of_the_provider', 'state_code_of_the_provider',
       'provider_type', 'place_of_service', 'hcpcs_code', 'hcpcs_description',
       'number_of_services', 'number_of_medicare_beneficiaries',
       'number_of_distinct_medicare_beneficiary_per_day_services',
       'average_medicare_allowed_amount', 'year', 'payment_type'],
      dtype='object')

In [8]:
pivot_index = ['national_provider_identifier',
               'entity_type_of_the_provider', 
               'place_of_service',
               'payment_type',
               'provider_type',
               'hcpcs_code',
               'hcpcs_description',
               'zip_code_of_the_provider', 
               'state_code_of_the_provider']

pivot_cols = ['year']

In [9]:
%%time
df_pmt_pvt = df_payments_combined.pivot_table(index = pivot_index, 
                                              columns = pivot_cols, 
                                              values = 'average_medicare_allowed_amount', 
                                              aggfunc=np.mean)
df_avg_pvt = df_avg_pmt.reset_index()

Wall time: 3min 5s


In [10]:
print(df_pmt_pvt.shape)
df_pmt_pvt.head()

(20793075, 12)


year,national_provider_identifier,entity_type_of_the_provider,place_of_service,payment_type,provider_type,hcpcs_code,hcpcs_description,zip_code_of_the_provider,state_code_of_the_provider,2015,2016,2017
0,1003000126,I,F,Doctor Only,Internal Medicine,99217,Hospital observation care discharge,215021854,MD,72.68,72.743158,73.3988
1,1003000126,I,F,Doctor Only,Internal Medicine,99218,Hospital observation care typically 30 minutes,215021854,MD,,,100.08
2,1003000126,I,F,Doctor Only,Internal Medicine,99219,Hospital observation care typically 50 minutes,215021854,MD,135.85,135.01,136.38
3,1003000126,I,F,Doctor Only,Internal Medicine,99220,Hospital observation care typically 70 minutes...,215021854,MD,,189.239565,190.363729
4,1003000126,I,F,Doctor Only,Internal Medicine,99221,"Initial hospital inpatient care, typically 30 ...",215021854,MD,101.365085,100.75,101.68


### Number of beneficiaries

In [11]:
%%time
df_services_pvt = df_payments_combined.pivot_table(index = pivot_index, 
                                              columns = pivot_cols, 
                                              values = 'number_of_services', 
                                              aggfunc=np.mean)
df_services_pvt = df_avg_services.reset_index()

Wall time: 2min 23s


In [12]:
print(df_services_pvt.shape)
df_services_pvt.head()

(20793075, 12)


year,national_provider_identifier,entity_type_of_the_provider,place_of_service,payment_type,provider_type,hcpcs_code,hcpcs_description,zip_code_of_the_provider,state_code_of_the_provider,2015,2016,2017
0,1003000126,I,F,Doctor Only,Internal Medicine,99217,Hospital observation care discharge,215021854,MD,23.0,57.0,100.0
1,1003000126,I,F,Doctor Only,Internal Medicine,99218,Hospital observation care typically 30 minutes,215021854,MD,,,26.0
2,1003000126,I,F,Doctor Only,Internal Medicine,99219,Hospital observation care typically 50 minutes,215021854,MD,18.0,38.0,52.0
3,1003000126,I,F,Doctor Only,Internal Medicine,99220,Hospital observation care typically 70 minutes...,215021854,MD,,23.0,59.0
4,1003000126,I,F,Doctor Only,Internal Medicine,99221,"Initial hospital inpatient care, typically 30 ...",215021854,MD,59.0,20.0,16.0


#### Number of services