In [1]:
import pandas as pd
import numpy as np
import sys
import os
import math
from sklearn.model_selection import train_test_split

sys.path.append(os.environ['CMS_ROOT'])

pd.set_option('display.max_columns', 150)
pd.set_option('display.max_rows', 150)

### Definitions

In [5]:
# shared data (raw)
shared_medicare_dir = '/home/groups/fau-bigdata-datasets/medicare/part-d'
# shared_medicare_dir = '/Users/jujohnson/cms-data/aggregated'
csv_file = '20190814_NPI-level_2013_to_2017_Medicare_PartD_aggregated_with_LEIE_one-hot_encoding.csv'

In [6]:
# local data (hdf5)
my_data_dir = '/home/jjohn273/cms-data/aggregated/'
# my_data_dir = '/Users/jujohnson/cms-data/aggregated/'
hdf5_file = 'partd-2017-minmax-scaled.hdf5'
hdf5_path = os.path.join(my_data_dir, hdf5_file)

# keys for accessing hdf5
raw_key = 'raw'
train_key = 'train_normalized'
test_key = 'test_normalized'

### Load Combined Data

In [9]:
# if not in hd5 format, write it to speed up future i/o
if not os.path.isfile(hdf5_path):
    df = pd.read_csv(os.path.join(shared_medicare_dir, csv_file))
    df.to_hdf(hdf5_path, raw_key)
else:
    df = pd.read_hdf(hdf5_path, raw_key)

In [10]:
from cms_modules.utils import get_imbalance_description
sample_count, feature_count = df.shape
print('Sample Count: ', sample_count)
print('Feature Count: ', feature_count)
print(get_imbalance_description(df['exclusion'], 'yes', 'no'))

  from ._conv import register_converters as _register_converters


Sample Count:  3647452
Feature Count:  136
Negative Samples: 3646167
Positive Samples: 1285
Postive Class Ratio: 0.03523007293858836


In [13]:
new_provider_types = pd.Index(data=['Addiction Medicine',
       'Advanced Heart Failure and Transplant Cardiology',
       'Allergy/Immunology', 'Ambulatory Surgical Center',
       'Anesthesiology', 'Anesthesiology Assistant',
       'Audiologist (billing independently)', 'Cardiac Electrophysiology',
       'Cardiac Surgery', 'Cardiology',
       'Certified Clinical Nurse Specialist', 'Certified Nurse Midwife',
       'Chiropractic', 'Clinical Psychologist',
       'Colorectal Surgery (formerly proctology)',
       'Critical Care (Intensivists)', 'CRNA', 'Dentist', 'Dermatology',
       'Diagnostic Radiology', 'Emergency Medicine', 'Endocrinology',
       'Family Practice', 'Gastroenterology', 'General Practice',
       'General Surgery', 'Geriatric Medicine', 'Geriatric Psychiatry',
       'Gynecological/Oncology', 'Hand Surgery', 'Hematology',
       'Hematology/Oncology', 'Hospice and Palliative Care',
       'Hospital (Dmercs Only)', 'Hospitalist', 'Infectious Disease',
       'Internal Medicine', 'Interventional Cardiology',
       'Interventional Pain Management', 'Interventional Radiology',
       'Licensed Clinical Social Worker', 'Mass Immunizer Roster Biller',
       'Maxillofacial Surgery', 'Medical Oncology',
       'Multispecialty Clinic/Group Practice', 'Nephrology', 'Neurology',
       'Neuropsychiatry', 'Neurosurgery', 'Nuclear Medicine',
       'Nurse Practitioner', 'Nursing Facility, Other (Dmercs Only)',
       'Obstetrics/Gynecology', 'Occupational therapist', 'Ophthalmology',
       'Optician', 'Optometry', 'Oral Surgery (dentists only)',
       'Orthopedic Surgery', 'Osteopathic Manipulative Medicine',
       'Otolaryngology', 'Pain Management', 'Pathology',
       'Pediatric Medicine', 'Peripheral Vascular Disease',
       'Physical Medicine and Rehabilitation',
       'Physical Therapist in Private Practice', 'Physician Assistant',
       'Plastic and Reconstructive Surgery', 'Podiatry',
       'Preventive Medicine', 'Psychiatry',
       'Public Health or Welfare Agency', 'Pulmonary Disease',
       'Radiation Oncology',
       'Registered Dietician/Nutrition Professional',
       'Rehabilitation Agency', 'Rheumatology',
       'Skilled Nursing Facility', 'Sleep Medicine',
       'Slide Preparation Facility', 'Speech Language Pathologist',
       'Sports Medicine', 'Surgical Oncology', 'Thoracic Surgery',
       'Unknown Physician Specialty Code', 'Urology', 'Vascular Surgery',
       'Voluntary Health or Charitable Agency', 'exclusion',
       'Anesthesiologist Assistants',
       'Individual Certified Prosthetist-Orthotist',
       'Mass Immunization Roster Biller', 'Physical Therapist',
       'Psychologist (billing independently)',
       'Public Health Welfare Agency', 'SNF (Dmercs Only)',
       'Unknown Supplier/Provider', 'All Other Suppliers',
       'Ambulance Service Supplier', 'Centralized Flu',
       'Medical Supply Company, Other', 'Pharmacy',
       'Voluntary Health or Charitable Agencies'])

In [11]:
df.columns.values

array(['npi', 'year', 'bene_count_sum', 'total_claim_count_sum',
       'total_30_day_fill_count_sum', 'total_day_supply_sum',
       'total_drug_cost_sum', 'bene_count_mean', 'total_claim_count_mean',
       'total_30_day_fill_count_mean', 'total_day_supply_mean',
       'total_drug_cost_mean', 'bene_count_median',
       'total_claim_count_median', 'total_30_day_fill_count_median',
       'total_day_supply_median', 'total_drug_cost_median',
       'bene_count_sd', 'total_claim_count_sd',
       'total_30_day_fill_count_sd', 'total_day_supply_sd',
       'total_drug_cost_sd', 'bene_count_min', 'total_claim_count_min',
       'total_30_day_fill_count_min', 'total_day_supply_min',
       'total_drug_cost_min', 'bene_count_max', 'total_claim_count_max',
       'total_30_day_fill_count_max', 'total_day_supply_max',
       'total_drug_cost_max', 'Addiction Medicine',
       'Advanced Heart Failure and Transplant Cardiology',
       'Allergy/Immunology', 'Ambulatory Surgical Center',
      

In [14]:
new_provider_types = pd.Index(data=['Addiction Medicine',
       'Advanced Heart Failure and Transplant Cardiology',
       'Allergy/Immunology', 'Ambulatory Surgical Center',
       'Anesthesiology', 'Anesthesiology Assistant',
       'Audiologist (billing independently)', 'Cardiac Electrophysiology',
       'Cardiac Surgery', 'Cardiology',
       'Certified Clinical Nurse Specialist', 'Certified Nurse Midwife',
       'Chiropractic', 'Clinical Psychologist',
       'Colorectal Surgery (formerly proctology)',
       'Critical Care (Intensivists)', 'CRNA', 'Dentist', 'Dermatology',
       'Diagnostic Radiology', 'Emergency Medicine', 'Endocrinology',
       'Family Practice', 'Gastroenterology', 'General Practice',
       'General Surgery', 'Geriatric Medicine', 'Geriatric Psychiatry',
       'Gynecological/Oncology', 'Hand Surgery', 'Hematology',
       'Hematology/Oncology', 'Hospice and Palliative Care',
       'Hospital (Dmercs Only)', 'Hospitalist', 'Infectious Disease',
       'Internal Medicine', 'Interventional Cardiology',
       'Interventional Pain Management', 'Interventional Radiology',
       'Licensed Clinical Social Worker', 'Mass Immunizer Roster Biller',
       'Maxillofacial Surgery', 'Medical Oncology',
       'Multispecialty Clinic/Group Practice', 'Nephrology', 'Neurology',
       'Neuropsychiatry', 'Neurosurgery', 'Nuclear Medicine',
       'Nurse Practitioner', 'Nursing Facility, Other (Dmercs Only)',
       'Obstetrics/Gynecology', 'Occupational therapist', 'Ophthalmology',
       'Optician', 'Optometry', 'Oral Surgery (dentists only)',
       'Orthopedic Surgery', 'Osteopathic Manipulative Medicine',
       'Otolaryngology', 'Pain Management', 'Pathology',
       'Pediatric Medicine', 'Peripheral Vascular Disease',
       'Physical Medicine and Rehabilitation',
       'Physical Therapist in Private Practice', 'Physician Assistant',
       'Plastic and Reconstructive Surgery', 'Podiatry',
       'Preventive Medicine', 'Psychiatry',
       'Public Health or Welfare Agency', 'Pulmonary Disease',
       'Radiation Oncology',
       'Registered Dietician/Nutrition Professional',
       'Rehabilitation Agency', 'Rheumatology',
       'Skilled Nursing Facility', 'Sleep Medicine',
       'Slide Preparation Facility', 'Speech Language Pathologist',
       'Sports Medicine', 'Surgical Oncology', 'Thoracic Surgery',
       'Unknown Physician Specialty Code', 'Urology', 'Vascular Surgery',
       'Voluntary Health or Charitable Agency', 'exclusion',
       'Anesthesiologist Assistants',
       'Individual Certified Prosthetist-Orthotist',
       'Mass Immunization Roster Biller', 'Physical Therapist',
       'Psychologist (billing independently)',
       'Public Health Welfare Agency', 'SNF (Dmercs Only)',
       'Unknown Supplier/Provider', 'All Other Suppliers',
       'Ambulance Service Supplier', 'Centralized Flu',
       'Medical Supply Company, Other', 'Pharmacy',
       'Voluntary Health or Charitable Agencies'])

In [15]:
partB_2016_providers = pd.Index([
    'Addiction Medicine',
   'All Other Suppliers', 'Allergy/Immunology',
   'Ambulance Service Supplier', 'Ambulatory Surgical Center',
   'Anesthesiologist Assistants', 'Anesthesiology',
   'Audiologist (billing independently)', 'Cardiac Electrophysiology',
   'Cardiac Surgery', 'Cardiology', 'Centralized Flu',
   'Certified Clinical Nurse Specialist', 'Certified Nurse Midwife',
   'Chiropractic', 'Clinical Laboratory', 'Clinical Psychologist',
   'Colorectal Surgery (formerly proctology)',
   'Critical Care (Intensivists)', 'CRNA', 'Dentist', 'Dermatology',
   'Diagnostic Radiology', 'Emergency Medicine', 'Endocrinology',
   'Family Practice', 'Gastroenterology', 'General Practice',
   'General Surgery', 'Geriatric Medicine', 'Geriatric Psychiatry',
   'Gynecological/Oncology', 'Hand Surgery', 'Hematology',
   'Hematology/Oncology', 'Hospice and Palliative Care',
   'Hospitalist', 'Independent Diagnostic Testing Facility',
   'Infectious Disease', 'Internal Medicine',
   'Interventional Cardiology', 'Interventional Pain Management',
   'Interventional Radiology', 'Licensed Clinical Social Worker',
   'Mammographic Screening Center', 'Mass Immunization Roster Biller',
   'Maxillofacial Surgery', 'Medical Oncology',
   'Multispecialty Clinic/Group Practice', 'Nephrology', 'Neurology',
   'Neuropsychiatry', 'Neurosurgery', 'Nuclear Medicine',
   'Nurse Practitioner', 'Obstetrics/Gynecology',
   'Occupational therapist', 'Ophthalmology', 'Optometry',
   'Oral Surgery (dentists only)', 'Orthopedic Surgery',
   'Osteopathic Manipulative Medicine', 'Otolaryngology',
   'Pain Management', 'Pathology', 'Pediatric Medicine',
   'Peripheral Vascular Disease',
   'Physical Medicine and Rehabilitation', 'Physical Therapist',
   'Physician Assistant', 'Plastic and Reconstructive Surgery',
   'Podiatry', 'Portable X-ray', 'Preventive Medicine', 'Psychiatry',
   'Public Health Welfare Agency', 'Pulmonary Disease',
   'Radiation Oncology', 'Radiation Therapy',
   'Registered Dietician/Nutrition Professional', 'Rheumatology',
   'Sleep Medicine', 'Slide Preparation Facility',
   'Speech Language Pathologist', 'Sports Medicine',
   'Surgical Oncology', 'Thoracic Surgery',
   'Unknown Physician Specialty Code', 'Unknown Supplier/Provider',
   'Urology', 'Vascular Surgery', 'Pharmacy',
   'Psychologist (billing independently)'
])

In [16]:
all_types = np.union1d(partB_2016_providers, new_provider_types)

In [19]:
print('Part B 2016 provider type count', len(partB_2016_providers))
print('Part D 2017 provider type count', len(new_provider_types))
print('Union of Part B and Combined type count', len(all_types))

Part B 2016 provider type count 93
Part D 2017 provider type count 104
Union of Part B and Combined type count 109


In [20]:
common_types = partB_2016_providers.join(new_provider_types, how='inner')
print('Common provider type count', len(common_types))

Common provider type count 88


In [21]:
print('Provider types that are in new 2017 data and not in old Part B data')
new_provider_types.difference(partB_2016_providers)

Provider types that are in new 2017 data and not in old Part B data


Index(['Advanced Heart Failure and Transplant Cardiology',
       'Anesthesiology Assistant', 'Hospital (Dmercs Only)',
       'Individual Certified Prosthetist-Orthotist',
       'Mass Immunizer Roster Biller', 'Medical Supply Company, Other',
       'Nursing Facility, Other (Dmercs Only)', 'Optician',
       'Physical Therapist in Private Practice',
       'Public Health or Welfare Agency', 'Rehabilitation Agency',
       'SNF (Dmercs Only)', 'Skilled Nursing Facility',
       'Voluntary Health or Charitable Agencies',
       'Voluntary Health or Charitable Agency', 'exclusion'],
      dtype='object')

In [22]:
print('Provider types that are in old Part B data and now in new 2017 data')
partB_2016_providers.difference(new_provider_types)

Provider types that are in old Part B data and now in new 2017 data


Index(['Clinical Laboratory', 'Independent Diagnostic Testing Facility',
       'Mammographic Screening Center', 'Portable X-ray', 'Radiation Therapy'],
      dtype='object')

### Examining New Features

In [25]:
partB_df = pd.read_hdf(os.path.join(my_data_dir, 'partB-2016-minmax-scaled.hdf5'), 'partB_raw')

In [26]:
old_features = pd.Index(partB_df.columns.values)
old_features[:10]

Index(['npi', 'year', 'line_srvc_cnt_sum', 'bene_unique_cnt_sum',
       'bene_day_srvc_cnt_sum', 'average_submitted_chrg_amt_sum',
       'average_medicare_payment_amt_sum', 'line_srvc_cnt_mean',
       'bene_unique_cnt_mean', 'bene_day_srvc_cnt_mean'],
      dtype='object')

In [27]:
new_features = pd.Index(df.columns.values)

In [29]:
difference = new_features.difference(old_features)
print('New features that were not in PartB data', len(difference))
difference

New features that were not in PartB data 45


Index(['Advanced Heart Failure and Transplant Cardiology',
       'Anesthesiology Assistant', 'Hospital (Dmercs Only)',
       'Individual Certified Prosthetist-Orthotist',
       'Mass Immunizer Roster Biller', 'Medical Supply Company, Other',
       'Nursing Facility, Other (Dmercs Only)', 'Optician',
       'Physical Therapist in Private Practice',
       'Public Health or Welfare Agency', 'Rehabilitation Agency',
       'SNF (Dmercs Only)', 'Skilled Nursing Facility',
       'Voluntary Health or Charitable Agencies',
       'Voluntary Health or Charitable Agency', 'bene_count_max',
       'bene_count_mean', 'bene_count_median', 'bene_count_min',
       'bene_count_sd', 'bene_count_sum', 'total_30_day_fill_count_max',
       'total_30_day_fill_count_mean', 'total_30_day_fill_count_median',
       'total_30_day_fill_count_min', 'total_30_day_fill_count_sd',
       'total_30_day_fill_count_sum', 'total_claim_count_max',
       'total_claim_count_mean', 'total_claim_count_median',
    

In [31]:
difference = old_features.difference(new_features)
print('Old features that are not in new part d data', len(difference))
difference

Old features that are not in new part d data 37


Index(['Clinical Laboratory', 'F', 'Independent Diagnostic Testing Facility',
       'M', 'Mammographic Screening Center', 'Portable X-ray',
       'Radiation Therapy', 'average_medicare_payment_amt_max',
       'average_medicare_payment_amt_mean',
       'average_medicare_payment_amt_median',
       'average_medicare_payment_amt_min', 'average_medicare_payment_amt_sd',
       'average_medicare_payment_amt_sum', 'average_submitted_chrg_amt_max',
       'average_submitted_chrg_amt_mean', 'average_submitted_chrg_amt_median',
       'average_submitted_chrg_amt_min', 'average_submitted_chrg_amt_sd',
       'average_submitted_chrg_amt_sum', 'bene_day_srvc_cnt_max',
       'bene_day_srvc_cnt_mean', 'bene_day_srvc_cnt_median',
       'bene_day_srvc_cnt_min', 'bene_day_srvc_cnt_sd',
       'bene_day_srvc_cnt_sum', 'bene_unique_cnt_max', 'bene_unique_cnt_mean',
       'bene_unique_cnt_median', 'bene_unique_cnt_min', 'bene_unique_cnt_sd',
       'bene_unique_cnt_sum', 'line_srvc_cnt_max', 'line_

### Drop NPI & Year Columns, Convert Label to 0 / 1

In [32]:
drop_columns = ['npi', 'year']
df['exclusion'].replace(['no', 'yes'], value=[0, 1], inplace=True)
df.drop(columns=drop_columns, inplace=True)

### Split Into Train and Test with 80 / 20 Split

In [33]:
train_data, test_data = train_test_split(
    df,
    test_size=0.2,
    shuffle=True,
    stratify=df['exclusion'],
    random_state=42)

In [34]:
print('Training imbalance levels:')
print(get_imbalance_description(train_data['exclusion']))
print('\nTest imbalance levels:')
print(get_imbalance_description(test_data['exclusion']))

Training imbalance levels:
Negative Samples: 2916933
Positive Samples: 1028
Postive Class Ratio: 0.03523008018270292

Test imbalance levels:
Negative Samples: 729234
Positive Samples: 257
Postive Class Ratio: 0.03523004396215992


In [35]:
test_data.shape

(729491, 134)

### Normalize Data

In [36]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
train_data_norm = scaler.fit_transform(train_data)
train_data_norm = pd.DataFrame(train_data_norm, columns=train_data.columns)

test_data_norm = scaler.transform(test_data)
test_data_norm = pd.DataFrame(test_data_norm, columns=test_data.columns)

### Save Normalized Data to HDF5

In [37]:
train_data_norm.to_hdf(hdf5_path, key=train_key)
test_data_norm.to_hdf(hdf5_path, key=test_key)

### Read Normalized Data from HDF5

In [38]:
train_data = pd.read_hdf(hdf5_path, key=train_key)
test_data = pd.read_hdf(hdf5_path, key=test_key)

In [39]:
train_data.describe()

Unnamed: 0,bene_count_sum,total_claim_count_sum,total_30_day_fill_count_sum,total_day_supply_sum,total_drug_cost_sum,bene_count_mean,total_claim_count_mean,total_30_day_fill_count_mean,total_day_supply_mean,total_drug_cost_mean,bene_count_median,total_claim_count_median,total_30_day_fill_count_median,total_day_supply_median,total_drug_cost_median,bene_count_sd,total_claim_count_sd,total_30_day_fill_count_sd,total_day_supply_sd,total_drug_cost_sd,bene_count_min,total_claim_count_min,total_30_day_fill_count_min,total_day_supply_min,total_drug_cost_min,bene_count_max,total_claim_count_max,total_30_day_fill_count_max,total_day_supply_max,total_drug_cost_max,Addiction Medicine,Advanced Heart Failure and Transplant Cardiology,Allergy/Immunology,Ambulatory Surgical Center,Anesthesiology,Anesthesiology Assistant,Audiologist (billing independently),Cardiac Electrophysiology,Cardiac Surgery,Cardiology,Certified Clinical Nurse Specialist,Certified Nurse Midwife,Chiropractic,Clinical Psychologist,Colorectal Surgery (formerly proctology),Critical Care (Intensivists),CRNA,Dentist,Dermatology,Diagnostic Radiology,Emergency Medicine,Endocrinology,Family Practice,Gastroenterology,General Practice,General Surgery,Geriatric Medicine,Geriatric Psychiatry,Gynecological/Oncology,Hand Surgery,Hematology,Hematology/Oncology,Hospice and Palliative Care,Hospital (Dmercs Only),Hospitalist,Infectious Disease,Internal Medicine,Interventional Cardiology,Interventional Pain Management,Interventional Radiology,Licensed Clinical Social Worker,Mass Immunizer Roster Biller,Maxillofacial Surgery,Medical Oncology,Multispecialty Clinic/Group Practice,Nephrology,Neurology,Neuropsychiatry,Neurosurgery,Nuclear Medicine,Nurse Practitioner,"Nursing Facility, Other (Dmercs Only)",Obstetrics/Gynecology,Occupational therapist,Ophthalmology,Optician,Optometry,Oral Surgery (dentists only),Orthopedic Surgery,Osteopathic Manipulative Medicine,Otolaryngology,Pain Management,Pathology,Pediatric Medicine,Peripheral Vascular Disease,Physical Medicine and Rehabilitation,Physical Therapist in Private Practice,Physician Assistant,Plastic and Reconstructive Surgery,Podiatry,Preventive Medicine,Psychiatry,Public Health or Welfare Agency,Pulmonary Disease,Radiation Oncology,Registered Dietician/Nutrition Professional,Rehabilitation Agency,Rheumatology,Skilled Nursing Facility,Sleep Medicine,Slide Preparation Facility,Speech Language Pathologist,Sports Medicine,Surgical Oncology,Thoracic Surgery,Unknown Physician Specialty Code,Urology,Vascular Surgery,Voluntary Health or Charitable Agency,exclusion,Anesthesiologist Assistants,Individual Certified Prosthetist-Orthotist,Mass Immunization Roster Biller,Physical Therapist,Psychologist (billing independently),Public Health Welfare Agency,SNF (Dmercs Only),Unknown Supplier/Provider,All Other Suppliers,Ambulance Service Supplier,Centralized Flu,"Medical Supply Company, Other",Pharmacy,Voluntary Health or Charitable Agencies
count,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0,2917961.0
mean,0.009728321,0.003767876,0.005565317,0.01247546,0.005234758,0.003710482,0.01012547,0.007926873,0.008784749,0.0006306926,0.001881276,0.00514511,0.005542636,0.007001246,0.0001967857,0.00156935,0.004652238,0.00682211,0.006757901,0.001856956,0.0006233594,0.0006880275,0.0007420354,0.002018693,5.831882e-05,0.001432322,0.004941091,0.007441956,0.0129987,0.002303452,0.0002488039,5.825986e-06,0.004895884,1.268009e-05,0.005149144,1.028115e-06,6.511396e-06,0.002257741,0.001214889,0.02837118,0.002972624,0.001129556,0.0001459924,0.0001620995,0.00166212,0.001371848,0.0002162469,0.009653659,0.0161301,0.002395166,0.04899963,0.007443554,0.1339977,0.01741593,0.008735209,0.0233622,0.002513742,0.0002392081,0.00108706,0.001612427,0.0008625886,0.01055018,0.0006096723,6.545667e-05,0.001063757,0.00623826,0.1405245,0.002130597,0.002373918,0.0004650508,9.45866e-05,3.427051e-07,0.001524352,0.00372349,0.0001062386,0.01095217,0.01733162,0.00216898,0.004454138,0.0001483913,0.1328198,1.37082e-06,0.03343088,3.221428e-05,0.02510143,3.769756e-06,0.02769434,0.007017914,0.02665697,0.0007066578,0.01236446,0.002494207,0.0005901381,0.006764998,7.950757e-05,0.009459345,1.233738e-05,0.08807931,0.004445227,0.01759071,0.0008009017,0.03187842,3.427051e-07,0.01174759,0.00410218,1.610714e-05,2.810181e-05,0.006121398,3.427051e-07,0.0002950691,5.483281e-06,3.35851e-05,0.0008951456,0.0007327034,0.001641557,0.0002296124,0.01353411,0.003496962,3.427051e-07,0.0003523008,6.511396e-06,3.427051e-07,7.539511e-06,7.505241e-05,0.0001758077,4.455166e-06,2.74164e-06,1.919148e-05,3.427051e-07,3.427051e-07,3.427051e-07,3.427051e-07,2.74164e-06,6.854101e-07
std,0.01738461,0.007976032,0.01121863,0.02500614,0.01307009,0.005003601,0.01118213,0.008727667,0.00873851,0.002802111,0.003595282,0.005293788,0.005421151,0.005542462,0.002034482,0.002863056,0.00674129,0.009694978,0.00974608,0.007512853,0.00221088,0.002708513,0.002538535,0.002549562,0.001746214,0.002758067,0.008476459,0.01211408,0.02078964,0.00872192,0.01577156,0.002413701,0.06979912,0.003560889,0.07157256,0.00101396,0.002551736,0.04746203,0.03483409,0.1660309,0.05444069,0.03358988,0.01208185,0.0127308,0.04073521,0.03701306,0.01470375,0.09777765,0.1259759,0.04888179,0.2158673,0.08595435,0.3406499,0.1308152,0.09305326,0.151051,0.05007418,0.01546451,0.03295268,0.04012266,0.02935719,0.1021708,0.02468402,0.008090266,0.03259793,0.07873593,0.3475304,0.0461092,0.04866501,0.02156002,0.009725106,0.0005854102,0.03901319,0.06090671,0.01030666,0.104078,0.1305038,0.04652179,0.06659054,0.01218069,0.3393799,0.00117082,0.1797589,0.005675672,0.1564332,0.001941583,0.1640956,0.08347853,0.1610788,0.02657365,0.110506,0.04987973,0.02428559,0.08197094,0.008916349,0.09679808,0.00351244,0.2834103,0.0665242,0.1314583,0.02828888,0.1756764,0.0005854102,0.1077478,0.06391677,0.004013338,0.005301041,0.07799954,0.0005854102,0.01717504,0.002341635,0.005795169,0.0299056,0.02705858,0.04048287,0.01515123,0.1155463,0.05903164,0.0005854102,0.01876637,0.002551736,0.0005854102,0.002745807,0.008662956,0.01325809,0.002110722,0.001655788,0.004380767,0.0005854102,0.0005854102,0.0005854102,0.0005854102,0.001655788,0.0008278949
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0007017992,0.0001434235,0.0001655903,0.0002426929,5.255814e-05,0.0008131145,0.003014643,0.002183978,0.00280459,5.165514e-05,0.0,0.002153316,0.002053328,0.003403882,3.341574e-05,0.000333651,0.0008079389,0.001015378,0.0007418646,4.385381e-05,0.0,0.0,0.0,0.0007021582,8.177219e-06,0.0003003304,0.0005337247,0.0006734201,0.00114125,4.173978e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.00304113,0.0007215994,0.0008991103,0.00177595,0.0006227756,0.002651189,0.006952135,0.005238818,0.006266897,0.0002104707,0.0,0.004091301,0.004399988,0.006183556,7.571725e-05,0.001070203,0.002538282,0.003342332,0.003066686,0.0004430301,0.0,0.0,0.0,0.001228777,1.189799e-05,0.000800881,0.001868037,0.002469207,0.004237577,0.0004186701,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.01088852,0.003771589,0.005479024,0.01233478,0.005136149,0.005090284,0.01386197,0.01103455,0.0122623,0.0005277083,0.00322442,0.006890612,0.007626646,0.009694723,0.0001639741,0.002092948,0.006281683,0.009258116,0.009284773,0.00130547,0.0,0.0004306632,0.0005866651,0.003101199,2.01837e-05,0.001868722,0.006104477,0.009203408,0.01621786,0.001708333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Checking for NaN and Null

In [40]:
test_data.describe()

Unnamed: 0,bene_count_sum,total_claim_count_sum,total_30_day_fill_count_sum,total_day_supply_sum,total_drug_cost_sum,bene_count_mean,total_claim_count_mean,total_30_day_fill_count_mean,total_day_supply_mean,total_drug_cost_mean,bene_count_median,total_claim_count_median,total_30_day_fill_count_median,total_day_supply_median,total_drug_cost_median,bene_count_sd,total_claim_count_sd,total_30_day_fill_count_sd,total_day_supply_sd,total_drug_cost_sd,bene_count_min,total_claim_count_min,total_30_day_fill_count_min,total_day_supply_min,total_drug_cost_min,bene_count_max,total_claim_count_max,total_30_day_fill_count_max,total_day_supply_max,total_drug_cost_max,Addiction Medicine,Advanced Heart Failure and Transplant Cardiology,Allergy/Immunology,Ambulatory Surgical Center,Anesthesiology,Anesthesiology Assistant,Audiologist (billing independently),Cardiac Electrophysiology,Cardiac Surgery,Cardiology,Certified Clinical Nurse Specialist,Certified Nurse Midwife,Chiropractic,Clinical Psychologist,Colorectal Surgery (formerly proctology),Critical Care (Intensivists),CRNA,Dentist,Dermatology,Diagnostic Radiology,Emergency Medicine,Endocrinology,Family Practice,Gastroenterology,General Practice,General Surgery,Geriatric Medicine,Geriatric Psychiatry,Gynecological/Oncology,Hand Surgery,Hematology,Hematology/Oncology,Hospice and Palliative Care,Hospital (Dmercs Only),Hospitalist,Infectious Disease,Internal Medicine,Interventional Cardiology,Interventional Pain Management,Interventional Radiology,Licensed Clinical Social Worker,Mass Immunizer Roster Biller,Maxillofacial Surgery,Medical Oncology,Multispecialty Clinic/Group Practice,Nephrology,Neurology,Neuropsychiatry,Neurosurgery,Nuclear Medicine,Nurse Practitioner,"Nursing Facility, Other (Dmercs Only)",Obstetrics/Gynecology,Occupational therapist,Ophthalmology,Optician,Optometry,Oral Surgery (dentists only),Orthopedic Surgery,Osteopathic Manipulative Medicine,Otolaryngology,Pain Management,Pathology,Pediatric Medicine,Peripheral Vascular Disease,Physical Medicine and Rehabilitation,Physical Therapist in Private Practice,Physician Assistant,Plastic and Reconstructive Surgery,Podiatry,Preventive Medicine,Psychiatry,Public Health or Welfare Agency,Pulmonary Disease,Radiation Oncology,Registered Dietician/Nutrition Professional,Rehabilitation Agency,Rheumatology,Skilled Nursing Facility,Sleep Medicine,Slide Preparation Facility,Speech Language Pathologist,Sports Medicine,Surgical Oncology,Thoracic Surgery,Unknown Physician Specialty Code,Urology,Vascular Surgery,Voluntary Health or Charitable Agency,exclusion,Anesthesiologist Assistants,Individual Certified Prosthetist-Orthotist,Mass Immunization Roster Biller,Physical Therapist,Psychologist (billing independently),Public Health Welfare Agency,SNF (Dmercs Only),Unknown Supplier/Provider,All Other Suppliers,Ambulance Service Supplier,Centralized Flu,"Medical Supply Company, Other",Pharmacy,Voluntary Health or Charitable Agencies
count,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0,729491.0
mean,0.009706,0.003753,0.005547,0.012438,0.005223,0.003714,0.010119,0.007919,0.008775,0.000632,0.001879,0.005137,0.005534,0.006994,0.000197,0.001575,0.004654,0.00682,0.006751,0.001865,0.00062,0.000686,0.00074,0.002021,5.8e-05,0.001435,0.004938,0.007438,0.012988,0.002309,0.000245,3e-06,0.004939,1e-05,0.005238,1e-06,7e-06,0.002271,0.001246,0.028449,0.002906,0.001169,0.000156,0.000173,0.001627,0.001383,0.000223,0.009855,0.016128,0.002557,0.049352,0.007356,0.133992,0.017387,0.008744,0.023412,0.00255,0.000249,0.001088,0.001686,0.000854,0.010415,0.000653,8e-05,0.001027,0.006335,0.140102,0.002174,0.002329,0.000489,9.9e-05,0.0,0.001478,0.003778,8.9e-05,0.010843,0.017115,0.002121,0.004429,0.00017,0.132693,5e-06,0.033416,3.4e-05,0.024804,4e-06,0.027822,0.007031,0.02644,0.000653,0.012517,0.002461,0.000565,0.00677,7.1e-05,0.009476,5e-06,0.088056,0.004455,0.017582,0.000785,0.031826,0.0,0.011804,0.004214,1.8e-05,2.6e-05,0.006154,0.0,0.000304,5e-06,3.7e-05,0.000887,0.000668,0.001679,0.00024,0.013744,0.003479,0.0,0.000352,5e-06,0.0,1e-05,9.6e-05,0.00014,4e-06,3e-06,2.3e-05,1e-06,0.0,0.0,0.0,3e-06,0.0
std,0.017352,0.007862,0.011128,0.024894,0.013029,0.005829,0.01153,0.008794,0.008669,0.002776,0.003616,0.005301,0.005485,0.005602,0.001935,0.003701,0.007103,0.009862,0.009637,0.007813,0.002194,0.002696,0.002653,0.00268,0.001592,0.003031,0.008493,0.012117,0.020709,0.008812,0.015663,0.001656,0.070105,0.003098,0.072184,0.001171,0.002618,0.047606,0.035278,0.166251,0.05383,0.034175,0.0125,0.013141,0.040305,0.037165,0.014946,0.098781,0.125967,0.050498,0.216602,0.08545,0.340644,0.13071,0.093102,0.151209,0.05043,0.015793,0.032973,0.041028,0.029211,0.101523,0.025536,0.008916,0.032026,0.079337,0.347093,0.046577,0.048204,0.022117,0.009934,0.0,0.038413,0.061349,0.009439,0.103565,0.129699,0.046002,0.066404,0.013037,0.339242,0.002342,0.179722,0.005854,0.155526,0.002028,0.164463,0.083555,0.160441,0.025536,0.111177,0.049544,0.023758,0.082004,0.008443,0.096885,0.002342,0.283376,0.066598,0.131427,0.028015,0.175538,0.0,0.108004,0.064778,0.004221,0.005103,0.078203,0.0,0.017442,0.002342,0.006084,0.029768,0.025829,0.040944,0.015487,0.116426,0.058882,0.0,0.018766,0.002342,0.0,0.003098,0.009795,0.011824,0.002028,0.001656,0.004827,0.001171,0.0,0.0,0.0,0.001656,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.9e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.000702,0.000141,0.000166,0.000241,5.2e-05,0.000809,0.003015,0.002184,0.002801,5.1e-05,0.0,0.002153,0.002053,0.003404,3.3e-05,0.000331,0.000803,0.001009,0.000737,4.4e-05,0.0,0.0,0.0,0.000702,8e-06,0.0003,0.0005,0.000673,0.001134,4.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.00302,0.000719,0.000897,0.001771,0.000618,0.002651,0.006957,0.005242,0.006267,0.00021,0.0,0.004091,0.0044,0.006184,7.5e-05,0.00107,0.002534,0.003344,0.003066,0.000441,0.0,0.0,0.0,0.001229,1.2e-05,0.000801,0.001868,0.002469,0.004232,0.000415,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.010846,0.00376,0.005463,0.012302,0.00512,0.005098,0.01382,0.011014,0.012246,0.000527,0.003224,0.006891,0.007627,0.009695,0.000164,0.002092,0.006262,0.009246,0.00927,0.001303,0.0,0.000431,0.000587,0.003101,2e-05,0.001869,0.006104,0.009194,0.016195,0.001702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.938284,0.337171,0.340633,0.578099,0.847268,2.000931,2.008254,1.045328,0.74023,0.592607,0.594153,0.595607,0.930451,0.929494,0.592607,1.654311,1.657172,1.633267,0.751292,1.605842,0.594153,0.595607,0.930451,0.929501,0.592607,0.929055,0.930649,0.917843,0.809865,1.210351,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0


In [41]:
train_data.loc[train_data.isna().any(axis=1)]

Unnamed: 0,bene_count_sum,total_claim_count_sum,total_30_day_fill_count_sum,total_day_supply_sum,total_drug_cost_sum,bene_count_mean,total_claim_count_mean,total_30_day_fill_count_mean,total_day_supply_mean,total_drug_cost_mean,bene_count_median,total_claim_count_median,total_30_day_fill_count_median,total_day_supply_median,total_drug_cost_median,bene_count_sd,total_claim_count_sd,total_30_day_fill_count_sd,total_day_supply_sd,total_drug_cost_sd,bene_count_min,total_claim_count_min,total_30_day_fill_count_min,total_day_supply_min,total_drug_cost_min,bene_count_max,total_claim_count_max,total_30_day_fill_count_max,total_day_supply_max,total_drug_cost_max,Addiction Medicine,Advanced Heart Failure and Transplant Cardiology,Allergy/Immunology,Ambulatory Surgical Center,Anesthesiology,Anesthesiology Assistant,Audiologist (billing independently),Cardiac Electrophysiology,Cardiac Surgery,Cardiology,Certified Clinical Nurse Specialist,Certified Nurse Midwife,Chiropractic,Clinical Psychologist,Colorectal Surgery (formerly proctology),Critical Care (Intensivists),CRNA,Dentist,Dermatology,Diagnostic Radiology,Emergency Medicine,Endocrinology,Family Practice,Gastroenterology,General Practice,General Surgery,Geriatric Medicine,Geriatric Psychiatry,Gynecological/Oncology,Hand Surgery,Hematology,Hematology/Oncology,Hospice and Palliative Care,Hospital (Dmercs Only),Hospitalist,Infectious Disease,Internal Medicine,Interventional Cardiology,Interventional Pain Management,Interventional Radiology,Licensed Clinical Social Worker,Mass Immunizer Roster Biller,Maxillofacial Surgery,Medical Oncology,Multispecialty Clinic/Group Practice,Nephrology,Neurology,Neuropsychiatry,Neurosurgery,Nuclear Medicine,Nurse Practitioner,"Nursing Facility, Other (Dmercs Only)",Obstetrics/Gynecology,Occupational therapist,Ophthalmology,Optician,Optometry,Oral Surgery (dentists only),Orthopedic Surgery,Osteopathic Manipulative Medicine,Otolaryngology,Pain Management,Pathology,Pediatric Medicine,Peripheral Vascular Disease,Physical Medicine and Rehabilitation,Physical Therapist in Private Practice,Physician Assistant,Plastic and Reconstructive Surgery,Podiatry,Preventive Medicine,Psychiatry,Public Health or Welfare Agency,Pulmonary Disease,Radiation Oncology,Registered Dietician/Nutrition Professional,Rehabilitation Agency,Rheumatology,Skilled Nursing Facility,Sleep Medicine,Slide Preparation Facility,Speech Language Pathologist,Sports Medicine,Surgical Oncology,Thoracic Surgery,Unknown Physician Specialty Code,Urology,Vascular Surgery,Voluntary Health or Charitable Agency,exclusion,Anesthesiologist Assistants,Individual Certified Prosthetist-Orthotist,Mass Immunization Roster Biller,Physical Therapist,Psychologist (billing independently),Public Health Welfare Agency,SNF (Dmercs Only),Unknown Supplier/Provider,All Other Suppliers,Ambulance Service Supplier,Centralized Flu,"Medical Supply Company, Other",Pharmacy,Voluntary Health or Charitable Agencies


In [42]:
test_data.loc[test_data.isna().any(axis=1)]

Unnamed: 0,bene_count_sum,total_claim_count_sum,total_30_day_fill_count_sum,total_day_supply_sum,total_drug_cost_sum,bene_count_mean,total_claim_count_mean,total_30_day_fill_count_mean,total_day_supply_mean,total_drug_cost_mean,bene_count_median,total_claim_count_median,total_30_day_fill_count_median,total_day_supply_median,total_drug_cost_median,bene_count_sd,total_claim_count_sd,total_30_day_fill_count_sd,total_day_supply_sd,total_drug_cost_sd,bene_count_min,total_claim_count_min,total_30_day_fill_count_min,total_day_supply_min,total_drug_cost_min,bene_count_max,total_claim_count_max,total_30_day_fill_count_max,total_day_supply_max,total_drug_cost_max,Addiction Medicine,Advanced Heart Failure and Transplant Cardiology,Allergy/Immunology,Ambulatory Surgical Center,Anesthesiology,Anesthesiology Assistant,Audiologist (billing independently),Cardiac Electrophysiology,Cardiac Surgery,Cardiology,Certified Clinical Nurse Specialist,Certified Nurse Midwife,Chiropractic,Clinical Psychologist,Colorectal Surgery (formerly proctology),Critical Care (Intensivists),CRNA,Dentist,Dermatology,Diagnostic Radiology,Emergency Medicine,Endocrinology,Family Practice,Gastroenterology,General Practice,General Surgery,Geriatric Medicine,Geriatric Psychiatry,Gynecological/Oncology,Hand Surgery,Hematology,Hematology/Oncology,Hospice and Palliative Care,Hospital (Dmercs Only),Hospitalist,Infectious Disease,Internal Medicine,Interventional Cardiology,Interventional Pain Management,Interventional Radiology,Licensed Clinical Social Worker,Mass Immunizer Roster Biller,Maxillofacial Surgery,Medical Oncology,Multispecialty Clinic/Group Practice,Nephrology,Neurology,Neuropsychiatry,Neurosurgery,Nuclear Medicine,Nurse Practitioner,"Nursing Facility, Other (Dmercs Only)",Obstetrics/Gynecology,Occupational therapist,Ophthalmology,Optician,Optometry,Oral Surgery (dentists only),Orthopedic Surgery,Osteopathic Manipulative Medicine,Otolaryngology,Pain Management,Pathology,Pediatric Medicine,Peripheral Vascular Disease,Physical Medicine and Rehabilitation,Physical Therapist in Private Practice,Physician Assistant,Plastic and Reconstructive Surgery,Podiatry,Preventive Medicine,Psychiatry,Public Health or Welfare Agency,Pulmonary Disease,Radiation Oncology,Registered Dietician/Nutrition Professional,Rehabilitation Agency,Rheumatology,Skilled Nursing Facility,Sleep Medicine,Slide Preparation Facility,Speech Language Pathologist,Sports Medicine,Surgical Oncology,Thoracic Surgery,Unknown Physician Specialty Code,Urology,Vascular Surgery,Voluntary Health or Charitable Agency,exclusion,Anesthesiologist Assistants,Individual Certified Prosthetist-Orthotist,Mass Immunization Roster Biller,Physical Therapist,Psychologist (billing independently),Public Health Welfare Agency,SNF (Dmercs Only),Unknown Supplier/Provider,All Other Suppliers,Ambulance Service Supplier,Centralized Flu,"Medical Supply Company, Other",Pharmacy,Voluntary Health or Charitable Agencies
