In [2]:
import pandas as pd
import numpy as np

# Load Data

## Establish paths

In [3]:
# Specialty predictions
all_specialty_preds_path = f'./cardiology_specialty_all_epis/propagate/prediction/prediction.xlsx'
tx_specialty_preds_path = f'./cardiology_specialty_tx_epis/propagate/prediction/prediction.xlsx'
no_specialty_preds_path = f'./cardiology_specialty_no_epis/propagate/prediction/prediction.xlsx'

# Subspecialty predictions
all_subspec_preds_path = f'./cardiology_subspecialty_all_epis/propagate/prediction/prediction.xlsx'
tx_subspec_preds_path = f'./cardiology_subspecialty_tx_epis/propagate/prediction/prediction.xlsx'
no_subspec_preds_path = f'./cardiology_subspecialty_no_epis/propagate/prediction/prediction.xlsx'

## Load predictions

In [4]:
all_specialty_preds = pd.read_excel(all_specialty_preds_path)
tx_specialty_preds = pd.read_excel(tx_specialty_preds_path)
no_specialty_preds = pd.read_excel(no_specialty_preds_path)
all_subspec_preds = pd.read_excel(all_subspec_preds_path)
tx_subspec_preds = pd.read_excel(tx_subspec_preds_path)
no_subspec_preds = pd.read_excel(no_subspec_preds_path)

In [5]:
all_specialty_preds.head()

Unnamed: 0,npi,label_1,probability_1,label_2,probability_2
0,1477549087,Cardiology,1.0,Other,0.0
1,1679899074,Cardiology,1.0,Other,0.0
2,1285945295,Cardiology,1.0,Other,0.0
3,1679735716,Cardiology,1.0,Other,0.0
4,1881895159,Cardiology,1.0,Other,0.0


# Compare Predictions

## Compare Specialty Predictions

### No Episodes to TX Episodes

#### NPIs that switched to Cardiology

In [6]:
no_specialty_cardio_npis = no_specialty_preds[no_specialty_preds['label_1'] == 'Cardiology']['npi']
tx_specialty_cardio_npis = tx_specialty_preds[tx_specialty_preds['label_1'] == 'Cardiology']['npi']

In [7]:
no_to_tx_cardio_switch = np.array(tx_specialty_cardio_npis[~tx_specialty_cardio_npis.isin(no_specialty_cardio_npis)])

In [8]:
print(len(no_to_tx_cardio_switch))
no_to_tx_cardio_switch

66


array([1609913045, 1992739163, 1669630968, 1689945776, 1598021560,
       1275771529, 1942336078, 1497755342, 1538505326, 1952380578,
       1982710539, 1801904008, 1518925155, 1134171713, 1467700716,
       1679506554, 1427009356, 1184684946, 1508093311, 1013995760,
       1245236728, 1184774127, 1922005768, 1700106812, 1629017587,
       1568464774, 1649375361, 1629107115, 1710186036, 1225128895,
       1871565077, 1225039720, 1114017142, 1760685861, 1164429171,
       1063403384, 1912993296, 1972573756, 1295960524, 1306063656,
       1316261183, 1790970986, 1811044878, 1811088529, 1770582223,
       1023239720, 1811081391, 1558471912, 1306856927, 1750338745,
       1083766729, 1952392888, 1902897341, 1821068511, 1669565149,
       1780764027, 1417222779, 1295760460, 1780642280, 1508880204,
       1871692715, 1689764573, 1477811222, 1467491464, 1629056320,
       1679570683])

#### NPIs that switched to Other

In [9]:
no_specialty_other_npis = no_specialty_preds[no_specialty_preds['label_1'] != 'Cardiology']['npi']
tx_specialty_other_npis = tx_specialty_preds[tx_specialty_preds['label_1'] != 'Cardiology']['npi']

In [10]:
no_to_tx_other_switch = np.array(tx_specialty_other_npis[~tx_specialty_other_npis.isin(no_specialty_other_npis)])

In [11]:
no_to_tx_other_switch

array([1619224417, 1902059447, 1316178502, 1073553954, 1558472043,
       1609808385, 1073506648, 1770650236, 1972695344, 1740486364,
       1063435899, 1922323146, 1235180167, 1427002427, 1073539516,
       1003894601, 1194725234, 1699062349, 1831151133, 1508837121,
       1427065796, 1275775512, 1104879709, 1790773240, 1639336050,
       1194887422, 1033190426, 1528030731, 1689753394, 1528294329,
       1952383564, 1144524349, 1649423872, 1841341369, 1619958717,
       1396730826, 1699739219, 1255375648, 1386610194, 1497164727,
       1801889845, 1578572186, 1316979255, 1841215597, 1457351074,
       1285620179, 1487175261, 1457448532, 1669457503, 1255331831,
       1598077307, 1942261094, 1982642450, 1508825092, 1790063972,
       1770540767, 1154329092, 1194778159, 1235100397, 1528148657,
       1972646859, 1013914795, 1932168580, 1982804761, 1053419192,
       1760434443, 1679524706, 1881742831, 1548222011, 1932161619,
       1518922772, 1730273061, 1447207758, 1245264324, 1043237

##### Examples of Switches

In [12]:
no_specialty_preds[no_specialty_preds['npi'] == 1770625832]

Unnamed: 0,npi,label_1,probability_1,label_2,probability_2
30099,1770625832,Cardiology,0.586,Other,0.414


In [13]:
tx_specialty_preds[tx_specialty_preds['npi'] == 1770625832]

Unnamed: 0,npi,label_1,probability_1,label_2,probability_2
30779,1770625832,Other,0.526,Cardiology,0.474


#### Check statistics around no episode label 1 prediction probability for those that switched

In [14]:
# Became cardiology
switch_to_cardio_label_1_probabilities = no_specialty_preds[no_specialty_preds['npi'].isin(no_to_tx_cardio_switch)]['probability_1']

# Became other
switch_to_other_label_1_probabilities = no_specialty_preds[no_specialty_preds['npi'].isin(no_to_tx_other_switch)]['probability_1']

In [15]:
# Became cardiology
switch_to_cardio_label_1_probabilities.describe()

count    66.000000
mean      0.593487
std       0.102244
min       0.500218
25%       0.512500
50%       0.556000
75%       0.644000
max       0.990000
Name: probability_1, dtype: float64

In [16]:
# Became other
switch_to_other_label_1_probabilities.describe()

count    576.000000
mean       0.549739
std        0.039179
min        0.500000
25%        0.518000
50%        0.542000
75%        0.570000
max        0.702000
Name: probability_1, dtype: float64

#### Investigate max switch to other

In [17]:
# What is the 'other' label that switched to cardiology but had a .99 probability previously?
max_to_cardio_switch = no_specialty_preds[(no_specialty_preds['npi'].isin(no_to_tx_cardio_switch)) & (no_specialty_preds['probability_1'] == .99)]

max_to_cardio_switch

Unnamed: 0,npi,label_1,probability_1,label_2,probability_2
12934,1669630968,Other,0.99,Cardiology,0.01


In [18]:
tx_specialty_preds[tx_specialty_preds['npi'] == 1669630968]

Unnamed: 0,npi,label_1,probability_1,label_2,probability_2
27351,1669630968,Cardiology,0.766,Other,0.234


In [19]:
max_to_cardio_switch_features = pd.read_csv('./1669630968_feature_set.csv')

max_to_cardio_switch_features

Unnamed: 0.1,Unnamed: 0,1669630968
0,label,0.0
1,tax_103T00000X,0.0
2,tax_171000000X,0.0
3,tax_174400000X,0.0
4,tax_193200000X,0.0
...,...,...
5210,epi_Px - cardiac - coronary art proc - cabg,0.0
5211,epi_Px - cardiac - coronary art proc - pci,3.0
5212,epi_Px - cardiac - heart rhythm - pacemaker/AICD,0.0
5213,epi_Px - cardiac - valve proc - open,0.0


In [20]:
path_to_specialty_switch = f'./provider_label_switch/specialty/'
path_to_subspecialty_switch = f'./provider_label_switch/subspecialty/'

In [21]:
all_tx_switches = np.append(no_to_tx_cardio_switch, no_to_tx_other_switch)

all_tx_switches

array([1609913045, 1992739163, 1669630968, 1689945776, 1598021560,
       1275771529, 1942336078, 1497755342, 1538505326, 1952380578,
       1982710539, 1801904008, 1518925155, 1134171713, 1467700716,
       1679506554, 1427009356, 1184684946, 1508093311, 1013995760,
       1245236728, 1184774127, 1922005768, 1700106812, 1629017587,
       1568464774, 1649375361, 1629107115, 1710186036, 1225128895,
       1871565077, 1225039720, 1114017142, 1760685861, 1164429171,
       1063403384, 1912993296, 1972573756, 1295960524, 1306063656,
       1316261183, 1790970986, 1811044878, 1811088529, 1770582223,
       1023239720, 1811081391, 1558471912, 1306856927, 1750338745,
       1083766729, 1952392888, 1902897341, 1821068511, 1669565149,
       1780764027, 1417222779, 1295760460, 1780642280, 1508880204,
       1871692715, 1689764573, 1477811222, 1467491464, 1629056320,
       1679570683, 1619224417, 1902059447, 1316178502, 1073553954,
       1558472043, 1609808385, 1073506648, 1770650236, 1972695

In [22]:
len(all_tx_switches)

642

In [23]:
tx_switches_selection = np.random.choice(all_tx_switches, 32)

In [24]:
training_set_npis = pd.read_csv('./training_npis.csv', header=None)

In [25]:
len(np.isin(tx_switches_selection, training_set_npis.to_xarray))

32

In [26]:
# np.savetxt('manual_tx_switch_eval.csv', tx_switches_selection, delimiter=',')

In [27]:
manual_tx_results = pd.read_csv('./manual_tx_switch_eval.csv')

manual_tx_results = manual_tx_results[~manual_tx_results['manual_label'].isnull()]
manual_tx_results

Unnamed: 0,npi,manual_label,Unnamed: 2
0,1720187000.0,Cardiology,207RC0000X
1,1518971000.0,Cardiology,208G00000X
2,1598993000.0,Cardiology,208G00000X
3,1649375000.0,Cardiology,208G00000X
4,1104880000.0,Cardiology,208G00000X
5,1164465000.0,Cardiology,208G00000X
6,1750357000.0,Cardiology,208G00000X
8,1558369000.0,Cardiology,207RC0000X
9,1063403000.0,Cardiology,2080P0202X
10,1194803000.0,Cardiology,207RC0000X


In [28]:
manual_tx_switch_eval = manual_tx_results.merge(tx_specialty_preds, how='inner', on='npi')\
    .merge(no_specialty_preds, how='inner', on='npi')\
    .merge(all_specialty_preds, how='inner', on='npi')

# Flag if correct without EGM
manual_tx_switch_eval['noEGM_correct'] = np.where(manual_tx_switch_eval['manual_label'] == manual_tx_switch_eval['label_1_y'], 1, 0)

# Flag if tx was correct
manual_tx_switch_eval['tx_correct'] = np.where(manual_tx_switch_eval['manual_label'] == manual_tx_switch_eval['label_1_x'], 1, 0)

# Flag if all EGM was correct
manual_tx_switch_eval['all_correct'] = np.where(manual_tx_switch_eval['manual_label'] == manual_tx_switch_eval['label_1'], 1, 0)

manual_tx_switch_eval


Unnamed: 0,npi,manual_label,Unnamed: 2,label_1_x,probability_1_x,label_2_x,probability_2_x,label_1_y,probability_1_y,label_2_y,probability_2_y,label_1,probability_1,label_2,probability_2,noEGM_correct,tx_correct,all_correct
0,1720187000.0,Cardiology,207RC0000X,Other,0.552,Cardiology,0.448,Cardiology,0.546,Other,0.454,Cardiology,0.576,Other,0.424,1,0,1
1,1518971000.0,Cardiology,208G00000X,Other,0.558,Cardiology,0.442,Cardiology,0.502,Other,0.498,Other,0.532,Cardiology,0.468,1,0,0
2,1598993000.0,Cardiology,208G00000X,Other,0.546,Cardiology,0.454,Cardiology,0.562,Other,0.438,Other,0.606,Cardiology,0.394,1,0,0
3,1649375000.0,Cardiology,208G00000X,Cardiology,0.564,Other,0.436,Other,0.524,Cardiology,0.476,Cardiology,0.55,Other,0.45,0,1,1
4,1104880000.0,Cardiology,208G00000X,Other,0.62,Cardiology,0.38,Cardiology,0.506,Other,0.494,Other,0.528,Cardiology,0.472,1,0,0
5,1164465000.0,Cardiology,208G00000X,Other,0.502,Cardiology,0.498,Cardiology,0.574,Other,0.426,Cardiology,0.582,Other,0.418,1,0,1
6,1750357000.0,Cardiology,208G00000X,Other,0.59,Cardiology,0.41,Cardiology,0.508,Other,0.492,Cardiology,0.52,Other,0.48,1,0,1
7,1558369000.0,Cardiology,207RC0000X,Other,0.53,Cardiology,0.47,Cardiology,0.514,Other,0.486,Other,0.58,Cardiology,0.42,1,0,0
8,1063403000.0,Cardiology,2080P0202X,Cardiology,0.528,Other,0.472,Other,0.50294,Cardiology,0.49706,Other,0.522,Cardiology,0.478,0,1,0
9,1194803000.0,Cardiology,207RC0000X,Other,0.526,Cardiology,0.474,Cardiology,0.536,Other,0.464,Other,0.556,Cardiology,0.444,1,0,0


In [31]:
manual_tx_switch_eval['npi'].astype(int).to_csv('./tx_switch_npis.csv')

In [91]:
print(
    f'''No EGM Correct: {sum(manual_tx_switch_eval['noEGM_correct'])}''', '\n'
    f'''TX Correct: {sum(manual_tx_switch_eval['tx_correct'])}''', '\n'
    f'''All EGM Correct: {sum(manual_tx_switch_eval['all_correct'])}'''
)

No EGM Correct: 26 
TX Correct: 5 
All EGM Correct: 16


### No Episodes to All EGM Episodes

#### NPIs that switched to Cardiology

In [92]:
no_specialty_cardio_npis = no_specialty_preds[no_specialty_preds['label_1'] == 'Cardiology']['npi']
all_specialty_cardio_npis = all_specialty_preds[all_specialty_preds['label_1'] == 'Cardiology']['npi']

In [94]:
no_to_all_cardio_switch = np.array(all_specialty_cardio_npis[~all_specialty_cardio_npis.isin(no_specialty_cardio_npis)])

In [96]:
print(len(no_to_all_cardio_switch))
no_to_all_cardio_switch

156


array([1609913045, 1538697719, 1679506554, 1629017587, 1427009356,
       1992739163, 1467491464, 1417907619, 1275771529, 1922005768,
       1992777551, 1801904008, 1336162262, 1811992647, 1770582223,
       1669630968, 1053592162, 1568464774, 1518112028, 1043212269,
       1508816596, 1184684946, 1205870805, 1417002908, 1134171713,
       1386626687, 1851460414, 1497769269, 1477556371, 1487608378,
       1528083888, 1821019902, 1891770137, 1699865873, 1245236728,
       1568473874, 1821055732, 1497755342, 1487846002, 1780797589,
       1578580023, 1740372770, 1184657645, 1750338745, 1568465516,
       1083766729, 1083779888, 1235286170, 1689945776, 1407858749,
       1043393010, 1730167404, 1093863532, 1225128895, 1972573756,
       1447206826, 1417051558, 1386684298, 1215174024, 1942336078,
       1639150949, 1598886277, 1518961853, 1598021560, 1861485831,
       1306063656, 1275549784, 1114017142, 1073581898, 1851394621,
       1871541482, 1932398161, 1306923123, 1326054669, 1184774

#### NPIs that switched to Other

In [98]:
no_specialty_other_npis = no_specialty_preds[no_specialty_preds['label_1'] != 'Cardiology']['npi']
all_specialty_other_npis = all_specialty_preds[all_specialty_preds['label_1'] != 'Cardiology']['npi']

In [99]:
no_to_tx_other_switch = np.array(tx_specialty_other_npis[~tx_specialty_other_npis.isin(no_specialty_other_npis)])

In [100]:
print(len(no_to_tx_other_switch))

no_to_tx_other_switch

576


array([1619224417, 1902059447, 1316178502, 1073553954, 1558472043,
       1609808385, 1073506648, 1770650236, 1972695344, 1740486364,
       1063435899, 1922323146, 1235180167, 1427002427, 1073539516,
       1003894601, 1194725234, 1699062349, 1831151133, 1508837121,
       1427065796, 1275775512, 1104879709, 1790773240, 1639336050,
       1194887422, 1033190426, 1528030731, 1689753394, 1528294329,
       1952383564, 1144524349, 1649423872, 1841341369, 1619958717,
       1396730826, 1699739219, 1255375648, 1386610194, 1497164727,
       1801889845, 1578572186, 1316979255, 1841215597, 1457351074,
       1285620179, 1487175261, 1457448532, 1669457503, 1255331831,
       1598077307, 1942261094, 1982642450, 1508825092, 1790063972,
       1770540767, 1154329092, 1194778159, 1235100397, 1528148657,
       1972646859, 1013914795, 1932168580, 1982804761, 1053419192,
       1760434443, 1679524706, 1881742831, 1548222011, 1932161619,
       1518922772, 1730273061, 1447207758, 1245264324, 1043237