In [1]:
import numpy as np
import pandas as pd

In [2]:
#Reading in provider data that was not matched by NPI
provider = pd.read_csv('Data/Providers_nomatch_byNPI.txt', sep='\t', low_memory=False)

In [3]:
#Confirming df seemed to have read in correctly
provider.head()

Unnamed: 0,LASTNAME,FIRSTNAME,MIDNAME,BUSNAME,SPECIALTY,npi,ADDRESS,CITY,STATE,ZIP,...,antipsych_bene_count_ge65,average_age_of_beneficiaries,beneficiary_female_count,beneficiary_male_count,beneficiary_nondual_count,beneficiary_dual_count,beneficiary_average_risk_score,Full_Name,City_State,FullName_CityState
0,,,,,,1003000126,,,,,...,,72.0,142.0,92.0,143.0,91.0,2.1685,ARDALANENKESHAFI,CUMBERLANDMD,ARDALANENKESHAFICUMBERLANDMD
1,,,,,,1003000142,,,,,...,0.0,65.0,184.0,92.0,143.0,133.0,1.8029,RASHIDKHALIL,TOLEDOOH,RASHIDKHALILTOLEDOOH
2,,,,,,1003000167,,,,,...,0.0,72.0,16.0,17.0,,,1.0598,JULIOESCOBAR,DAYTONNV,JULIOESCOBARDAYTONNV
3,,,,,,1003000175,,,,,...,0.0,,,,,,,BELINDAREYES-VASQUEZ,LA PUENTECA,BELINDAREYES-VASQUEZLA PUENTECA
4,,,,,,1003000282,,,,,...,0.0,62.0,,,,,4.5148,ROSIEBLAKEMORE,NASHVILLETN,ROSIEBLAKEMORENASHVILLETN


In [4]:
provider.columns

Index(['LASTNAME', 'FIRSTNAME', 'MIDNAME', 'BUSNAME', 'SPECIALTY', 'npi',
       'ADDRESS', 'CITY', 'STATE', 'ZIP', 'EXCLYear', 'excl_type',
       'nppes_provider_last_org_name', 'nppes_provider_first_name',
       'nppes_provider_mi', 'nppes_credentials', 'nppes_provider_gender',
       'nppes_entity_code', 'nppes_provider_street1', 'nppes_provider_city',
       'nppes_provider_zip5', 'nppes_provider_state', 'nppes_provider_country',
       'specialty_description', 'description_flag',
       'medicare_prvdr_enroll_status', 'total_claim_count',
       'total_30_day_fill_count', 'total_drug_cost', 'total_day_supply',
       'bene_count', 'ge65_suppress_flag', 'total_claim_count_ge65',
       'total_30_day_fill_count_ge65', 'total_drug_cost_ge65',
       'total_day_supply_ge65', 'bene_count_ge65_suppress_flag',
       'bene_count_ge65', 'brand_suppress_flag', 'brand_claim_count',
       'brand_drug_cost', 'generic_suppress_flag', 'generic_claim_count',
       'generic_drug_cost', 'other

In [5]:
#Dropping unneeded columns populated from last merge
provider_clean = provider.drop(columns = ['LASTNAME', 'FIRSTNAME', 'MIDNAME', 'BUSNAME', 'SPECIALTY', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'EXCLYear', 'excl_type', 'Full_Name', 'City_State', 'FullName_CityState'])

In [6]:
#Reading in the additional exclusions that have been reinstated since 2019
exclusions = pd.read_csv('Data/reinstatements_NPI.csv')

In [7]:
#Confirming df seemed to have read in correctly
exclusions.head()

Unnamed: 0,LASTNAME,FIRSTNAME,MIDNAME,BUSNAME,SPECIALTY,NPI,DOB,ADDRESS,CITY,STATE,ZIP,EXCLTYPE,EXCLYear,REINYear,excl_type
0,,,,"DAY'S HEALTHY LIVING PHARMACY,",PHARMACY,1861645061,,P O BOX 61,TIPTON,IN,46072,1128b8,2013,2019,1128b8
1,,,,"DAY'S RX, INC",PHARMACY,1932167103,,P O BOX 61,TIPTON,IN,46072,1128b8,2013,2019,1128b8
2,BERMAN,LARRY,R,,DENTIST,1326191347,19531214.0,P O BOX 4392,HIGHLAND PARK,NJ,8904,1128a1,2012,2019,1128a1
3,BRENNAN,KEVIN,FRANCIS,,NURSE/NURSES AIDE,1295828879,19560925.0,"1019 SUPERIOR STREET, APT 2",WATERTOWN,NY,13601,1128b4,2013,2019,1128b4
4,BROOKS,SHEILA,JEAN,,PODIATRY,1467442871,19481003.0,P O BOX 690,BLUEFIELD,WV,24701,1128b4,2017,2019,1128b4


In [8]:
#Checking number of rows for provider data
provider_clean.shape

(1162451, 72)

In [9]:
#Checking number of rows for exclusion data
exclusions.shape

(131, 15)

In [10]:
#Renaming so that both dataframes have the same column heading on the primary key
exclusions.rename(columns = {'NPI': 'npi'}, inplace = True)

In [11]:
#Joining the data together
Combined_2017_NPI = pd.merge(provider_clean, exclusions, how = 'left', on = 'npi', sort=False, indicator = True)

In [12]:
#Exploring the merge
Combined_2017_NPI.head()

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_city,nppes_provider_zip5,...,DOB,ADDRESS,CITY,STATE,ZIP,EXCLTYPE,EXCLYear,REINYear,excl_type,_merge
0,1003000126,ENKESHAFI,ARDALAN,,M.D.,M,I,900 SETON DR,CUMBERLAND,21502.0,...,,,,,,,,,,left_only
1,1003000142,KHALIL,RASHID,,M.D.,M,I,4126 N HOLLAND SYLVANIA RD,TOLEDO,43623.0,...,,,,,,,,,,left_only
2,1003000167,ESCOBAR,JULIO,E,DDS,M,I,5 PINE CONE RD,DAYTON,89403.0,...,,,,,,,,,,left_only
3,1003000175,REYES-VASQUEZ,BELINDA,,D.D.S.,F,I,322 N AZUSA AVE STE 202,LA PUENTE,91744.0,...,,,,,,,,,,left_only
4,1003000282,BLAKEMORE,ROSIE,K,FNP,F,I,TENNESSEE PRISON FOR WOMEN,NASHVILLE,37243.0,...,,,,,,,,,,left_only


In [13]:
#Checking to see how many matched
Combined_2017_NPI['_merge'].value_counts()

left_only     1162433
both               18
right_only          0
Name: _merge, dtype: int64

In [14]:
#Creating a column to flag those marked as excluded
Combined_2017_NPI['exclusion_flag'] = Combined_2017_NPI['_merge'].apply(lambda x: 1 if x == 'both' else 0)

In [15]:
#Confirming exclusion column added correctly
Combined_2017_NPI['exclusion_flag'].value_counts()

0    1162433
1         18
Name: exclusion_flag, dtype: int64

In [16]:
#Looking at columns as can now drop unneeded columns
Combined_2017_NPI.columns

Index(['npi', 'nppes_provider_last_org_name', 'nppes_provider_first_name',
       'nppes_provider_mi', 'nppes_credentials', 'nppes_provider_gender',
       'nppes_entity_code', 'nppes_provider_street1', 'nppes_provider_city',
       'nppes_provider_zip5', 'nppes_provider_state', 'nppes_provider_country',
       'specialty_description', 'description_flag',
       'medicare_prvdr_enroll_status', 'total_claim_count',
       'total_30_day_fill_count', 'total_drug_cost', 'total_day_supply',
       'bene_count', 'ge65_suppress_flag', 'total_claim_count_ge65',
       'total_30_day_fill_count_ge65', 'total_drug_cost_ge65',
       'total_day_supply_ge65', 'bene_count_ge65_suppress_flag',
       'bene_count_ge65', 'brand_suppress_flag', 'brand_claim_count',
       'brand_drug_cost', 'generic_suppress_flag', 'generic_claim_count',
       'generic_drug_cost', 'other_suppress_flag', 'other_claim_count',
       'other_drug_cost', 'mapd_suppress_flag', 'mapd_claim_count',
       'mapd_drug_cost', 'pd

In [17]:
#Dropping the _merge column from data and the other unneeded columns from the LEIE data
Combined_2017_NPI_2 = Combined_2017_NPI.drop(columns = ['LASTNAME', 'FIRSTNAME', 'MIDNAME', 'BUSNAME', 'SPECIALTY', 'DOB', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'EXCLTYPE', '_merge'])

In [18]:
#Relooking at column names
Combined_2017_NPI_2.columns

Index(['npi', 'nppes_provider_last_org_name', 'nppes_provider_first_name',
       'nppes_provider_mi', 'nppes_credentials', 'nppes_provider_gender',
       'nppes_entity_code', 'nppes_provider_street1', 'nppes_provider_city',
       'nppes_provider_zip5', 'nppes_provider_state', 'nppes_provider_country',
       'specialty_description', 'description_flag',
       'medicare_prvdr_enroll_status', 'total_claim_count',
       'total_30_day_fill_count', 'total_drug_cost', 'total_day_supply',
       'bene_count', 'ge65_suppress_flag', 'total_claim_count_ge65',
       'total_30_day_fill_count_ge65', 'total_drug_cost_ge65',
       'total_day_supply_ge65', 'bene_count_ge65_suppress_flag',
       'bene_count_ge65', 'brand_suppress_flag', 'brand_claim_count',
       'brand_drug_cost', 'generic_suppress_flag', 'generic_claim_count',
       'generic_drug_cost', 'other_suppress_flag', 'other_claim_count',
       'other_drug_cost', 'mapd_suppress_flag', 'mapd_claim_count',
       'mapd_drug_cost', 'pd

In [19]:
Combined_2017_NPI_2.shape

(1162451, 76)

In [20]:
#Saving as new file to append to the other matches
Combined_2017_NPI_2.to_csv('Data/Providers_reinstatement_merge_NPI.txt', sep='\t', index=False)

In [21]:
match = Combined_2017_NPI_2['exclusion_flag'] == 1
matches_only = Combined_2017_NPI_2[match]

In [22]:
matches_only.head(20)

Unnamed: 0,npi,nppes_provider_last_org_name,nppes_provider_first_name,nppes_provider_mi,nppes_credentials,nppes_provider_gender,nppes_entity_code,nppes_provider_street1,nppes_provider_city,nppes_provider_zip5,...,average_age_of_beneficiaries,beneficiary_female_count,beneficiary_male_count,beneficiary_nondual_count,beneficiary_dual_count,beneficiary_average_risk_score,EXCLYear,REINYear,excl_type,exclusion_flag
6443,1003822834,DARVISH,SAADI,M,O.D,M,I,1208 VILLAGE CREEK DR,PLANO,75093.0,...,,,,,,,2019.0,2019.0,1128b14,1
13605,1013087741,KALLINI,ADEL,A,MD,M,I,440 E SAMPLE RD,POMPANO BEACH,33064.0,...,66.0,71.0,34.0,55.0,50.0,1.8295,2018.0,2018.0,1128Aa,1
36528,1033145487,QAMAR,ASAD,U,M.D.,M,I,4730 SW 49TH ROAD,OCALA,34474.0,...,71.0,169.0,164.0,212.0,121.0,1.8685,2017.0,2020.0,1128b7,1
58059,1053303560,EHRMAN,WALTER,J,MD,M,I,1180 N INDIAN CANYON DR,PALM SPRINGS,92262.0,...,72.0,,,,,2.0166,2019.0,2019.0,1128b14,1
141292,1124057245,ALHATOU,MOHAMMED,I,M.D.,M,I,2850 PELHAM CT,ORANGEBURG,29118.0,...,65.0,253.0,172.0,182.0,243.0,1.76,2019.0,2019.0,1128a4,1
167816,1144367947,LORBER,BURTON,,DMD,M,I,875 MAMARONECK AVE,MAMARONECK,10543.0,...,59.0,,,,,1.2619,2018.0,2019.0,1128b14,1
200704,1174571285,ALEXANDER,RUSSELL,D,D.O.,M,I,15155 HIGHWAY 43,RUSSELLVILLE,35653.0,...,62.0,122.0,78.0,83.0,117.0,1.3894,2019.0,2019.0,1128b14,1
286128,1245445931,MEEHAN-KIERMEIER,KATHLEEN,A,"D.M.D., M.S.",F,I,1000 WHITE HORSE RD,VOORHEES,8043.0,...,66.0,46.0,20.0,42.0,24.0,1.1889,2018.0,2019.0,1128b14,1
293605,1255370367,VAZQUEZ,CARLOS,R,M.D.,M,I,303 NO. CLYDE MORRIS BLVD.,DAYTONA BEACH,32114.0,...,73.0,111.0,83.0,135.0,59.0,2.0273,2019.0,2019.0,1128b14,1
541788,1467624536,JUNG,RICHARD,K,DDS,M,I,225 BROAD AVE STE 102,PALISADES PARK,7650.0,...,73.0,,,,,0.9744,2019.0,2019.0,1128b14,1


In [23]:
matches_only['EXCLYear'].value_counts()

2018.0    7
2019.0    7
2017.0    3
2020.0    1
Name: EXCLYear, dtype: int64

In [24]:
matches_only['excl_type'].value_counts()

1128b14    13
1128a4      1
1128b7      1
1128b4      1
1128Aa      1
1128b1      1
Name: excl_type, dtype: int64