In [1]:
import pandas as pd
from npi.npi import NPI
from npi.utils.utils import longprint
from npi.pecos import PECOS, medicare_program_engagement, medical_school

### Instantiate an NPI object

The NPI class provides convenient access to the NPI data housed in our shared folders.

The canonical way to obtain an instance of this class is via (where entities=1 is specified if you only want to work with individuals)

In [2]:
npi = NPI(entities=1)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
* Function: get_entity
* Metadata: {'func': 'get_entity', 'args': ['/work/akilby/npi/data/', None], 'kwargs': {}, 'code': {'get_entity': '-code snipped-'}} 
* (identified) Called functions: ['get_entity']
* Cache found; loaded from ID 1594255253350343
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------



### Display a list of data that can be currently managed by adding as an attibute

In [3]:
npi.display_gettable_attributes()

Gettable raw data:
 ['PLICNUM', 'PLICSTATE', 'entity', 'npideactdate', 'npireactdate', 'pcredential', 'pcredentialoth', 'pfname', 'pfnameoth', 'pgender', 'plname', 'plnameoth', 'ploccityname', 'plocline1', 'plocline2', 'plocstatename', 'ploctel', 'ploczip', 'pmname', 'pmnameoth', 'pnamesuffix', 'pnamesuffixoth', 'ptaxcode'] 


Gettable processed data:
 ['credentials', 'expanded_fullnames', 'fullnames', 'licenses', 'practitioner_type', 'removaldate', 'secondary_practice_locations']


### Retrieve all the data you need 

Hint: if you want to get rid of all the bulky output, run the below cell twice. The second time will be instantaneous and will clear the output.

In [35]:
npi.retrieve('removaldate')
npi.retrieve('fullnames')
npi.retrieve('expanded_fullnames')
npi.retrieve('practitioner_type')
npi.retrieve('plocstatename')
npi.retrieve('ploctel')
npi.retrieve('credentials')
npi.retrieve('pgender')
npi.retrieve('licenses')
npi.retrieve('ptaxcode')

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
* Function: handle_suffixes_in_lastnames
* Metadata: {'func': 'handle_suffixes_in_lastnames', 'args': [-3080851301225376489, 'plname', 'pnamesuffix', ['JR', 'III', 'II', 'SR', 'I', 'IV', 'X', 'V', 'IX', 'VI', 'VII', 'VIII', 'J R', 'I I I', 'I I', 'S R', 'I', 'I V', 'X', 'V', 'I X', 'V I', 'V I I', 'V I I I'], ['JR', 'III', 'VIII']], 'kwargs': {}, 'code': {'handle_suffixes_in_lastnames': '-code snipped-'}} 
* (identified) Called functions: ['handle_suffixes_in_lastnames']
* Cache not found; running
* Cache created with ID 1594268978295326
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

-----------------------------------------------------

### Construct a dataset of NPI removals that can be used to search for people

1. Get only doctors and advanced practice nurses for now

In [5]:
npi.practitioner_type.set_index('npi')

Unnamed: 0_level_0,MD/DO,Dentist,Psychologist,Podiatrist,Optometrist,Chiropractor,Pharmacist,NP,CNM,CNS,CRNA,Other APRN,PA,RN,LPN/LVN,Other Nurse,Student,No Category
npi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1003000100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1003000126,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1003000134,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1003000142,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1003000159,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1992999833,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1992999858,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1992999866,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1992999874,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [6]:
s = npi.practitioner_type.set_index('npi')[['MD/DO', 'CNM', 'CNS', 'CRNA', 'NP', 'Other APRN', 'PA', 'RN', 'LPN/LVN', 'Other Nurse', 'Student']].sum(axis=1) > 0 
mds_aprns_others = s[s].reset_index().drop(columns=0)

2. Merge together the MD/APRN list with the deactivation list and their credentials

In [7]:
removed = npi.removaldate.merge(mds_aprns_others)
removed = removed.merge(npi.credentials)
removed = removed.drop(columns='pcredential_stripped')
removed.sort_values('npideactdate').reset_index(drop=True)

Unnamed: 0,npi,npideactdate,pcredential,cat
0,1063568517,2007-12-07,RNC,RN
1,1417160383,2007-12-07,MD,MD/DO
2,1558443713,2007-12-07,RN,RN
3,1164488441,2007-12-19,MD,MD/DO
4,1669549481,2008-01-03,MD,MD/DO
...,...,...,...,...
68390,1639580731,2020-07-02,MPH,
68391,1538229281,2020-07-02,MD,MD/DO
68392,1558462713,2020-07-02,DMD,Dentist
68393,1841602281,2020-07-03,DO,MD/DO


3. Merge in the state locations in the month they are removed

Here we are selecting the last month observed for each NPI, for convenience

In [8]:
states = npi.plocstatename.groupby('npi', as_index=False).last().drop(columns='month')

In [9]:
removed = removed.merge(states)

4. Merge in full names

In [10]:
removed = removed.merge(npi.fullnames)

### 60 most recent removals

People can be duplicated if they have more than one value for fullname or credential. You can use this additional context to find people

In [11]:
removed.sort_values(['npideactdate', 'npi']).tail(60)

Unnamed: 0,npi,npideactdate,pcredential,cat,plocstatename,pfname,pmname,plname,pnamesuffix,othflag
57068,1780601369,2020-06-30,DO,MD/DO,NJ,EUGENE,MARIO,DIMARCO,,0
57216,1780649582,2020-06-30,MD,MD/DO,SC,S,D,PENDERGRASS,III,0
57217,1780649582,2020-06-30,MD,MD/DO,SC,SAMUEL,D,PENDERGRASS,III,0
57691,1780869271,2020-06-30,MD,MD/DO,TX,WILLIAM,GEORGE,ROBERTSON,III,0
57692,1780869271,2020-06-30,PA,PA,TX,WILLIAM,GEORGE,ROBERTSON,III,0
59033,1801926159,2020-06-30,DMD,Dentist,NJ,DANIEL,THOMAS,FRANTA,,0
59118,1801963228,2020-06-30,DDS,Dentist,NY,RIGOBERTO,,CORNEJO,,0
59507,1811175995,2020-06-30,LVN,LPN/LVN,CA,MIA,VERNESE,WRIGHT-SHAZEL,,0
59567,1811225691,2020-06-30,MD,MD/DO,NY,CESAR,,JARDON,,0
59568,1811225691,2020-06-30,DR OF PSYCHOLOGY,,NY,CESAR,,JARDON,,0


### Retrieving activity information from Medicare, and med school graduations.

This finds people with NPIs who are active in the Medicare data, and also pulls their medical school or training graduation date. Helps understand whether or not they are likely to be retired

In [12]:
active = medicare_program_engagement() 
med_school = medical_school()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
* Function: physician_compare_select_vars
* Metadata: {'func': 'physician_compare_select_vars', 'args': [[]], 'kwargs': {'drop_duplicates': False, 'date_var': True}, 'code': {'physician_compare_select_vars': '-code snipped-', 'detect_date': '-code snipped-', 'convert_dtypes': '-code snipped-'}} 
* (identified) Called functions: ['physician_compare_select_vars', 'detect_date', 'convert_dtypes']
* Cache found; loaded from ID 1591669125237018
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------------------------------------------------------------------------

In [13]:
active

Unnamed: 0,npi,PC_Min_Year,PC_Max_Year,PartD_Min_Year,PartD_Max_Year,PartB_Min_Year,PartB_Max_Year,maybe_active,active_2020
0,1003000126,2013,2020,2013,2017,2012,2017,True,True
1,1003000134,2013,2020,,,2012,2017,True,True
2,1003000142,2013,2020,2013,2017,2012,2017,True,True
3,1003000159,2014,2015,2014,2014,,,False,False
4,1003000381,2013,2014,,,2012,2012,False,False
...,...,...,...,...,...,...,...,...,...
2065784,1992996672,,,,,2012,2017,True,False
2065785,1992996995,,,,,2012,2013,False,False
2065786,1992998157,,,,,2012,2015,False,False
2065787,1992998702,,,,,2012,2017,True,False


In [14]:
med_school

Unnamed: 0,npi,Medical school name,Graduation year
0,1003001363,UNIVERSITY OF ARKANSAS COLLEGE OF MEDICINE,2003
1,1003001843,OTHER,1998
2,1003001017,"UNIVERSITY OF CALIFORNIA, IRVINE, CALIFORNIA C...",1968
3,1235197823,OTHER,1980
4,1780720284,OTHER,1982
...,...,...,...
1567657,1548515877,OTHER,1997
1567658,1598747891,OTHER,1993
1567659,1629204938,OTHER,2005
1567660,1700104353,"UNIVERSITY OF CALIFORNIA, SAN DIEGO SCHOOL OF ...",2010


In [15]:
pecos = PECOS(['NPI', 
               'Medical school name', 
               'Graduation year', 
               'Organization legal name',
               'Group Practice PAC ID',
               'Number of Group Practice members',
               'Hospital affiliation CCN 1',
               'Hospital affiliation LBN 1',
               'Hospital affiliation CCN 2',
               'Hospital affiliation LBN 2',
               'Hospital affiliation CCN 3',
               'Hospital affiliation LBN 3',
               'Hospital affiliation CCN 4',
               'Hospital affiliation LBN 4',
               'Hospital affiliation CCN 5',
               'Hospital affiliation LBN 5'],
              drop_duplicates=False, date_var=True)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
* Function: physician_compare_select_vars
* Metadata: {'func': 'physician_compare_select_vars', 'args': [['NPI', 'Medical school name', 'Graduation year', 'Organization legal name', 'Group Practice PAC ID', 'Number of Group Practice members', 'Hospital affiliation CCN 1', 'Hospital affiliation LBN 1', 'Hospital affiliation CCN 2', 'Hospital affiliation LBN 2', 'Hospital affiliation CCN 3', 'Hospital affiliation LBN 3', 'Hospital affiliation CCN 4', 'Hospital affiliation LBN 4', 'Hospital affiliation CCN 5', 'Hospital affiliation LBN 5'], False, True], 'kwargs': {}, 'code': {'physician_compare_select_vars': '-code snipped-', 'detect_date': '-code snipped-', 'convert_dtypes': '-code snipped-'}} 
* (identified) Called functions: ['physician_compare_select_vars', 'detect_date', 'convert_dtype

In [16]:
last_place_practiced = pecos.physician_compare.sort_values(['NPI', 'date']).groupby(['NPI']).last()
last_place_practiced

Unnamed: 0_level_0,Medical school name,Graduation year,Organization legal name,Group Practice PAC ID,Number of Group Practice members,Hospital affiliation CCN 1,Hospital affiliation LBN 1,Hospital affiliation CCN 2,Hospital affiliation LBN 2,Hospital affiliation CCN 3,Hospital affiliation LBN 3,Hospital affiliation CCN 4,Hospital affiliation LBN 4,Hospital affiliation CCN 5,Hospital affiliation LBN 5,date
NPI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1003000126,OTHER,1994,SOUTHEASTERN INTENSIVIST SERVICES PC,9335152107,134,490112,CJW MEDICAL CENTER,490107,RESTON HOSPITAL CENTER,490048,LEWISGALE MEDICAL CENTER,490024,CARILION ROANOKE MEMORIAL HOSPITAL,,Y,2020-04-16
1003000134,UNIVERSITY OF KENTUCKY COLLEGE OF MEDICINE,2003,NORTH SHORE UNIVERSITY HEALTHSYSTEM FACULTY PR...,2163334699,1276,140010,NORTHSHORE UNIVERSITY HEALTHSYSTEM - EVANSTON ...,140130,NORTHWESTERN LAKE FOREST HOSPITAL,140242,CENTRAL DUPAGE HOSPITAL,140242,CENTRAL DUPAGE HOSPITAL,140258,ALEXIAN BROTHERS MEDICAL CENTER 1,2020-04-16
1003000142,OTHER,1999,"MERCY HEALTH PHYSICIANS - NORTH, LLC.",2668522400,402,360262,MERCY ST ANNE HOSPITAL,360112,MERCY ST VINCENT MEDICAL CENTER,360081,MERCY ST CHARLES HOSPITAL,360074,FLOWER HOSPITAL,360090,ST LUKE'S HOSPITAL,2020-04-16
1003000159,OTHER,1983,AMERICAN CURRENT CARE PA,9739260019,466,,,,,,,,,,,2015-10-01
1003000381,OTHER,2000,"TRI-COUNTY ORTHOPAEDIC CENTER,PA",4981698362,12,,,,,,,,,,,2014-04-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1992999759,OTHER,2014,HEALING SPRINGS HEALTH CENTER,9739439100,2,420007,SPARTANBURG MEDICAL CENTER,420007,SPARTANBURG MEDICAL CENTER,,,,,,Y,2020-04-16
1992999791,OTHER,2006,MID-SOUTH MATERNAL FETAL MEDICINE PC,244289015,2,,,,,,,,,,,2020-04-16
1992999817,MEDICAL COLLEGE OF WISCONSIN,2004,PERMANENTE MEDICAL GROUP INC,8921910225,6596,50748,KAISER FOUNDATION HOSPITAL MANTECA,,,,,,,,Y,2020-04-16
1992999825,JEFFERSON MEDICAL COLLEGE OF THOMAS JEFFERSON ...,2005,VIRGINIA MASON MEDICAL CENTER,9830002617,790,500005,VIRGINIA MASON MEDICAL CENTER,500141,ST FRANCIS COMMUNITY HOSPITAL,,,,,,Y,2020-04-16


In [17]:
df = (removed
      .merge(active, how='left')
      .sort_values(['npideactdate', 'npi'])
      .reset_index(drop=True)
     )

In [18]:
df.tail(60)

Unnamed: 0,npi,npideactdate,pcredential,cat,plocstatename,pfname,pmname,plname,pnamesuffix,othflag,PC_Min_Year,PC_Max_Year,PartD_Min_Year,PartD_Max_Year,PartB_Min_Year,PartB_Max_Year,maybe_active,active_2020
73087,1780601369,2020-06-30,DO,MD/DO,NJ,EUGENE,MARIO,DIMARCO,,0,2013.0,2020.0,2013.0,2017.0,2012.0,2017.0,True,True
73088,1780649582,2020-06-30,MD,MD/DO,SC,S,D,PENDERGRASS,III,0,2013.0,2019.0,2013.0,2017.0,2012.0,2017.0,False,False
73089,1780649582,2020-06-30,MD,MD/DO,SC,SAMUEL,D,PENDERGRASS,III,0,2013.0,2019.0,2013.0,2017.0,2012.0,2017.0,False,False
73090,1780869271,2020-06-30,MD,MD/DO,TX,WILLIAM,GEORGE,ROBERTSON,III,0,2013.0,2016.0,2013.0,2015.0,2012.0,2015.0,False,False
73091,1780869271,2020-06-30,PA,PA,TX,WILLIAM,GEORGE,ROBERTSON,III,0,2013.0,2016.0,2013.0,2015.0,2012.0,2015.0,False,False
73092,1801926159,2020-06-30,DMD,Dentist,NJ,DANIEL,THOMAS,FRANTA,,0,,,2013.0,2017.0,,,True,False
73093,1801963228,2020-06-30,DDS,Dentist,NY,RIGOBERTO,,CORNEJO,,0,,,,,,,,
73094,1811175995,2020-06-30,LVN,LPN/LVN,CA,MIA,VERNESE,WRIGHT-SHAZEL,,0,,,,,,,,
73095,1811225691,2020-06-30,MD,MD/DO,NY,CESAR,,JARDON,,0,,,,,,,,
73096,1811225691,2020-06-30,DR OF PSYCHOLOGY,,NY,CESAR,,JARDON,,0,,,,,,,,


In [19]:
df2 = df[['npi', 'npideactdate']].drop_duplicates().merge(last_place_practiced.reset_index().rename(columns={'NPI': 'npi'}), how='left').drop(columns=['Medical school name', 'Graduation year', 'date'])
df2.tail(60)

Unnamed: 0,npi,npideactdate,Organization legal name,Group Practice PAC ID,Number of Group Practice members,Hospital affiliation CCN 1,Hospital affiliation LBN 1,Hospital affiliation CCN 2,Hospital affiliation LBN 2,Hospital affiliation CCN 3,Hospital affiliation LBN 3,Hospital affiliation CCN 4,Hospital affiliation LBN 4,Hospital affiliation CCN 5,Hospital affiliation LBN 5
64674,1730581166,2020-06-30,SPRINGFIELD CLINIC LLP,547166076.0,539.0,140135.0,DECATUR MEMORIAL HOSPITAL,140166.0,ST MARYS HOSPITAL,,,,,,Y
64675,1740340264,2020-06-30,INTERNAL MEDICAL CLINIC PLLC,749436970.0,1.0,230156.0,ST JOSEPH MERCY HOSPITAL,230020.0,OAKWOOD HEALTHCARE INC,230142.0,OAKWOOD HEALTHCARE INC,,,,
64676,1740348333,2020-06-30,,,,,,,,,,,,,
64677,1750486973,2020-06-30,,,,,,,,,,,,,
64678,1760621189,2020-06-30,,,,,,,,,,,,,
64679,1780601369,2020-06-30,BRIGANTINE EYE CARE CENTER,8325098445.0,1.0,310064.0,ATLANTICARE REGIONAL MEDICAL CENTER - CITY CAMPUS,,,,,,,,Y
64680,1780649582,2020-06-30,CAROLINA HEALTH CENTERS INC,5496707804.0,20.0,420071.0,SELF REGIONAL HEALTHCARE,,,,,,,,Y
64681,1780869271,2020-06-30,AUSTIN COLON AND RECTAL CLINIC ASSOCIATION,7810051703.0,8.0,450713.0,ST DAVIDS HEALTHCARE PARTNERSHIP LP LLP,,,,,,,,Y
64682,1801926159,2020-06-30,,,,,,,,,,,,,
64683,1801963228,2020-06-30,,,,,,,,,,,,,


### Getting details on specific people for KHN

In [20]:
list_of_people = [1073579033, 1184674699, 1568471878, 1174690101, 1770580441, 1417315912, 1649589847, 1346456324, 1457358467, 1790786606, 1801878566, 1497731905, 1497725980, 1962577148, 1093864449, 1003801002, 1316972490, 1619910247, 1972622884, 1164459897, 1710084504, 1932285228, 1033261409, 1043587363, 1124369665, 1376540252, 1306827886, 1770820110, 1811996036, 1144556960, 1073992087, 1508831694, 1790049401, 1417973652, 1710006846, 1972959625, 1447348925, 1982627824]

In [21]:
people = pd.DataFrame({'npi': list_of_people})

In [22]:
tels = npi.ploctel.merge(people, how='right')

In [23]:
tels = tels.loc[tels.groupby('npi')["month"].idxmax()]

In [24]:
extra_data = people.merge(tels.drop(columns='month'), how='left').merge(df2.drop(columns='npideactdate'), how='left').merge(df[['npi', 'npideactdate'] + [x for x in df.columns if x.endswith('_Year') or 'active' in x]].drop_duplicates(), how='left')

In [25]:
extra_data = extra_data.assign(ploctel=lambda df: '(' + df.ploctel.str[:3] + ') ' + df.ploctel.str[3:6] + '-' + df.ploctel.str[6:])

In [26]:
extra_data.to_csv('/home/akilby/extra_data.csv')

In [27]:
taxes = people.merge(npi.ptaxcode)

In [28]:
taxes = taxes[~(taxes[['npi', 'Type', 'Classification']].duplicated(keep=False) & taxes.Specialization.isnull())]

In [29]:
taxes = taxes.assign(num=lambda df: df.groupby('npi').cumcount()+1).drop(columns='EntityType').set_index(['npi', 'num']).unstack()

In [30]:
new_column_names = [str(x[0])+str(x[1]) for x in taxes.columns.tolist()]
new_column_order = [str(x[0])+str(x[1]) for x in sorted(taxes.columns.tolist(), key = lambda x: x[1])]

In [31]:
taxes.columns=new_column_names

In [32]:
taxes[new_column_order]

Unnamed: 0_level_0,ptaxcode1,Type1,Classification1,Specialization1,ptaxcode2,Type2,Classification2,Specialization2,ptaxcode3,Type3,Classification3,Specialization3
npi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1003801002,2085R0202X,Allopathic & Osteopathic Physicians,Radiology -,Diagnostic Radiology,2085R0204X,Allopathic & Osteopathic Physicians,Radiology -,Vascular & Interventional Radiology,,,,
1033261409,207R00000X,Allopathic & Osteopathic Physicians,Internal Medicine,,207KA0200X,Allopathic & Osteopathic Physicians,Allergy & Immunology,Allergy,,,,
1043587363,363L00000X,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,,,,,,,,,
1073579033,207RC0000X,Allopathic & Osteopathic Physicians,Internal Medicine,Cardiovascular Disease,207RC0001X,Allopathic & Osteopathic Physicians,Internal Medicine,Clinical Cardiac Electrophysiology,,,,
1073992087,390200000X,"Student, Health Care",Student in an Organized Health Care Education/...,,,,,,,,,
1093864449,2084P0800X,Allopathic & Osteopathic Physicians,Psychiatry & Neurology -,Psychiatry,2084A0401X,Allopathic & Osteopathic Physicians,Psychiatry & Neurology -,Addiction Medicine,,,,
1124369665,363LF0000X,Physician Assistants & Advanced Practice Nursi...,Nurse Practitioner,Family,,,,,,,,
1144556960,2084P0800X,Allopathic & Osteopathic Physicians,Psychiatry & Neurology -,Psychiatry,,,,,,,,
1164459897,207R00000X,Allopathic & Osteopathic Physicians,Internal Medicine,,,,,,,,,
1174690101,208D00000X,Allopathic & Osteopathic Physicians,General Practice,,,,,,,,,


In [33]:
final_data = npi.pgender.merge(taxes[new_column_order].reset_index().merge(extra_data))

In [34]:
people.merge(final_data).to_csv('/home/akilby/extra_data_all.csv')

### Medical Examiner data - Match to Miami Dade county

In [48]:
me_match = npi.expanded_fullnames.merge(npi.removaldate.query('npideactdate>="2020-01-01"'))
me_match

Unnamed: 0,npi,pfname,pmname,plname,pnamesuffix,name,npideactdate
0,1003010430,LYLE,,PRATHER,,LYLE PRATHER,2020-02-17
1,1003019183,BARBARA,,BROGIE,,BARBARA BROGIE,2020-02-10
2,1003027426,JOHN,,POLACHECK,,JOHN POLACHECK,2020-01-06
3,1003027871,JOAN,,FLYNN,,JOAN FLYNN,2020-01-06
4,1003027871,JOAN,LESLIE,FLYNN,,JOAN LESLIE FLYNN,2020-01-06
...,...,...,...,...,...,...,...
21917,1982963815,ARNOLD,,AREM,I,ARNOLD AREM I,2020-05-11
21918,1992761902,EUGENE,P,HAGAN,,EUGENE P HAGAN,2020-03-16
21919,1992761902,EUGENE,,HAGAN,,EUGENE HAGAN,2020-03-16
21920,1992761902,EUGENE,P,HAGAN,JR,EUGENE P HAGAN JR,2020-03-16


In [65]:
npi.expanded_fullnames

Unnamed: 0,npi,pfname,pmname,plname,pnamesuffix,name
0,1003000100,GERARDO,,GOMEZ,,GERARDO GOMEZ
1,1003000126,ARDALAN,,ENKESHAFI,,ARDALAN ENKESHAFI
2,1003000134,THOMAS,,CIBULL,,THOMAS CIBULL
3,1003000134,THOMAS,L,CIBULL,,THOMAS L CIBULL
4,1003000134,THOMAS,LEE,CIBULL,,THOMAS LEE CIBULL
...,...,...,...,...,...,...
11408834,1992998835,THOMAS,J,NOTO,JR,THOMAS J NOTO JR
11408835,1992998835,THOMAS,,NOTO,JR,THOMAS NOTO JR
11408836,1992999759,DINO,VINCENT,SORIANO,SR,DINO VINCENT SORIANO SR
11408837,1992999759,DINO,,SORIANO,SR,DINO SORIANO SR


In [49]:
miamidade=pd.read_excel('/home/akilby/MiamiDadeME_COVID19_deaths_06082020_5PM.xls')
miamidade

Unnamed: 0,CaseNum,NameLast,NAMEFIRST,Age,Race,Ethnicity,Gender,DeathDate,Mode,CauseA,...,ResZip,DeathAddrNumber,DeathAddrPreDirection,DeathAddrStreetName,DeathAddrStreetType,DeathCity,DeathZip,DeathPlace,COVID19CaseStatus,COVID19TestSendFDH
0,2020-00950,ETIENNE,DIEUMENE,94 Years,Black,,Female,2020-03-23,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,,1611.0,NW,12th,AVE,Miami,33136.0,Jackson Health System,Positive,True
1,2020-00956,ALPISTE,LUIS,79 Years,White,Hispanic,Male,2020-03-24,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,33162.0,160.0,NW,170th,ST,Miami,33169.0,Jackson North Medical Center,Positive,True
2,2020-00962,VALDES,AROLDO,77 Years,White,,Male,2020-03-24,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,33175.0,12500.0,SW,45TH,ST,Miami,33175.0,,Positive,True
3,2020-00963,MILLER,NATHALIE,44 Years,Black,Non-Hispanic,Female,2020-03-24,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,33161.0,20900.0,,Biscayne,BLVD,Aventura,33180.0,Aventura Hospital & Medical Center,Positive,True
4,2020-01002,MONDUY-CARRERA,ISRAEL,40 Years,White,,Male,2020-03-26,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,,4300.0,,Alton,RD,Miami Beach,33140.0,Mount Sinai Hospital & Medical Center,Positive,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
699,2020-02648,SANTOVENA CAYON,JUAN,63 Years,White,Hispanic,Male,2020-06-02,Natural,Congestive Heart Failure,...,,444.0,E,42nd,ST,Miami,33013.0,,Positive,True
700,2020-02658,SANDERS,AASE,87 Years,White,,Female,2020-06-05,Natural,COVID-19 (SARS-CoV-2) Infection with Probable ...,...,,,,,,,,,Positive,True
701,2020-02664,OTTO,GLADYS,65 Years,Black,,Female,2020-06-02,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,,3663.0,S,Miami,AVE,Miami,33133.0,Mercy Hospital A Campus of Plantation General ...,Positive,True
702,2020-02665,CABEIRO,MANUEL,81 Years,,,Male,2020-05-24,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,,2001.0,W,68th,ST,Hialeah,33016.0,Palmetto General Hospital,Positive,True


In [61]:
(miamidade.merge(me_match, right_on=['pfname', 'plname'], left_on=['NAMEFIRST', 'NameLast'])
          .drop(columns=['pfname', 'pmname', 'plname', 'pnamesuffix', 'name'])
          .drop_duplicates()
          .query('DeathDate<=npideactdate')
          .merge(npi.credentials)
          .merge(states)
          .merge(npi.licenses))


Unnamed: 0,CaseNum,NameLast,NAMEFIRST,Age,Race,Ethnicity,Gender,DeathDate,Mode,CauseA,...,COVID19CaseStatus,COVID19TestSendFDH,npi,npideactdate,pcredential,pcredential_stripped,cat,plocstatename,PLICNUM,PLICSTATE
0,2020-01190,BASS,SIDDHARTH,56 Years,Asian or Pacific Islander,Non-Hispanic,Male,2020-04-04,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1194979906,2020-05-18,MD,MD,MD/DO,FL,ME79410,FL
1,2020-01278,LASER,MARK,63 Years,White,Non-Hispanic,Male,2020-04-07,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1588668412,2020-05-26,MD,MD,MD/DO,CT,032712,CT
2,2020-01322,CALDERA-NIEVES,LUIS,63 Years,White,Hispanic,Male,2020-04-08,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1619981834,2020-05-27,MD,MD,MD/DO,FL,ME64963,FL
3,2020-01810,HERNANDEZ,JOSE,83 Years,Black,Hispanic,Male,2020-04-28,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1063503431,2020-05-07,MD,MD,MD/DO,FL,ME69978,FL
4,2020-01907,GARCIA,JOSE,78 Years,White,,Male,2020-05-01,Natural,COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1053558098,2020-06-03,MD,MD,MD/DO,PA,MD030092L,PA
5,2020-01946,BARQUET,GLENN,50 Years,White,Hispanic,Male,2020-05-02,Natural,Complications of COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1184674699,2020-06-15,MD,MD,MD/DO,FL,ME1112,FL
6,2020-01946,BARQUET,GLENN,50 Years,White,Hispanic,Male,2020-05-02,Natural,Complications of COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1184674699,2020-06-15,MD,MD,MD/DO,FL,ME81112,FL
7,2020-01946,BARQUET,GLENN,50 Years,White,Hispanic,Male,2020-05-02,Natural,Complications of COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1184674699,2020-06-15,MDPA,MDPA,,FL,ME1112,FL
8,2020-01946,BARQUET,GLENN,50 Years,White,Hispanic,Male,2020-05-02,Natural,Complications of COVID-19 (SARS-CoV-2) Pneumonia,...,Positive,True,1184674699,2020-06-15,MDPA,MDPA,,FL,ME81112,FL


In [62]:
_.to_csv('/home/akilby/miamidade.csv')