# Data

## Loading Data

### Read Model Output Data

In [58]:
import pandas as pd
df = pd.read_csv('../Examples/RTA/example_outputs.csv', index_col=False)
df['rta'] = df['RTA_conf'].round().astype(bool)

print(df.columns)
print(df['rta'])
print(df)

Index(['F1', 'AdmitDate', 'Day_In_Stay', 'RTA_conf', 'Date_Of_Process',
       'Feature_1', 'Feature_2', 'Feature_3', 'Feature_4', 'Feature_5',
       'Feature_6', 'Feature_7', 'Feature_8', 'Feature_9', 'Feature_10',
       'BWLoadDateTime', 'BWUpdateDateTime', 'Unnamed: 17', 'rta'],
      dtype='object')
0     True
1     True
2    False
3    False
Name: rta, dtype: bool
    F1   AdmitDate  Day_In_Stay  RTA_conf Date_Of_Process          Feature_1  \
0  604    9/3/2020           33      0.89       8/10/2020   CBC W/ AUTO DIFF   
1  605    9/3/2020           33      0.86       8/10/2020   CBC W/ AUTO DIFF   
2  586    9/5/2020           33      0.04       9/10/2020   CBC W/ AUTO DIFF   
3  607   9/29/2020            9      0.04       8/10/2020   CBC W/ AUTO DIFF   

                       Feature_2               Feature_3  \
0   PT EVALUATION MOD COMPLEXITY     CULTURE SCREEN MRSA   
1             ARTERIAL BLOOD GAS   THERAPEUTIC PROCEDURE   
2            CULTURE SCREEN MRSA      ARTERIA

### Read Feature Value Data

In [59]:
feats = pd.read_csv('../Examples/RTA/example_feature_values.csv', index_col='Index')

# print(feats.columns)
# print(feats.sample(10))
print(feats.describe())

       CBC W/ AUTO DIFF  VENIPUNCTURE  COMPREHEN METABOLIC PANEL  \
count        200.000000    200.000000                 200.000000   
mean           0.475000      0.335000                   0.185000   
std            0.633547      0.473175                   0.437874   
min            0.000000      0.000000                   0.000000   
25%            0.000000      0.000000                   0.000000   
50%            0.000000      0.000000                   0.000000   
75%            1.000000      1.000000                   0.000000   
max            5.000000      1.000000                   3.000000   

       CHEST 1 VIEW FRONTAL  METABOLIC PANEL TOTAL CA  \
count            200.000000                200.000000   
mean               0.140000                  0.335000   
std                0.401505                  0.586939   
min                0.000000                  0.000000   
25%                0.000000                  0.000000   
50%                0.000000                  

## Create possible feature values to add to outputs data

### Get all features used in outputs data

In [62]:
import numpy as np

feat_cols = df.iloc[:,5:15]
features_in_outputs = [s.strip() for s in np.unique(feat_cols.values.flatten())]
features_in_outputs
#feats[features_in_outputs]
feat_cols

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10
0,CBC W/ AUTO DIFF,PT EVALUATION MOD COMPLEXITY,CULTURE SCREEN MRSA,ARTERIAL BLOOD GAS,THERAPEUTIC PROCEDURE,EVAL SWALLOW/ORAL FUNCTION,VANCOMYCIN,GAIT TRAINING 15 MIN,METOPROLOL TART 25MG TAB,INSULIN HUM REGULAR 100U/ML IN
1,CBC W/ AUTO DIFF,ARTERIAL BLOOD GAS,THERAPEUTIC PROCEDURE,VANCOMYCIN,METOPROLOL TART 25MG TAB,MULTIVITAMIN TAB,POTASSIUM CL 20MEQ TER,URINALYSIS AUTO W/O MICRO,MASK TRACH,ASPIRIN 81MG ECT
2,CBC W/ AUTO DIFF,CULTURE SCREEN MRSA,ARTERIAL BLOOD GAS,GAIT TRAINING 15 MIN,MULTIVITAMIN TAB,ASPIRIN 81MG ECT,ATORVASTATIN CALCIUM 40MG TAB,CREAM SWEEN 24 5OZ,PT EVALUATION HIGH COMPLEXITY,IPRATR-ALBUTEROL 0.5-3MG/3ML S
3,CBC W/ AUTO DIFF,CULTURE SCREEN MRSA,ARTERIAL BLOOD GAS,OT EVALUATION HIGH COMPLEXITY,THERAPEUTIC PROCEDURE,INSULIN HUM REGULAR 100U/ML IN,ACETAMINOPHEN 325MG TAB,MULTIVITAMIN TAB,GABAPENTIN 300MG CAP,URINALYSIS AUTO W/O MICRO


### Fill in missing feature values with common values from each class (RTA > .5 and RTA < .5)

In [63]:
df['RTA_conf']
feats[feats['RTA']==1]

# TODO: For all features_in_outputs, create filler values to insert into example output.
#       Do this for both RTA and no RTA

# TODO: Create new columns in example outputs df to store calculated filler values

# TODO: Form and test a new prompt using these new feature values 

Unnamed: 0_level_0,CBC W/ AUTO DIFF,VENIPUNCTURE,COMPREHEN METABOLIC PANEL,CHEST 1 VIEW FRONTAL,METABOLIC PANEL TOTAL CA,GLUC BLOOD NOVA STAT STRIP,SODIUM CL 0.9% IVS,MAGNESIUM,SPUTUM INDUCTION,AEROSOL/VAPOR INHALE INITIAL,...,TRACH SUCT 14FR-12 W/SWVL A,MIDODRINE HCL 5MG TAB,NEBULIZER MISTY MAX DISP 10,ALBUMIN HUMAN 25% IVS,CANN INN 8.0 SHILEY,NEB PREF'LD 1000ML WATER ONLY,HEPATIC FUNCTION PANEL,OXYCODONE HCL 5MG TAB,RBC LEUKOREDUCED-PROC ONLY,RTA
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
564927,0,0,0,1,0,0,3,0,0,0,...,0,0,0,0,0,0,0,0,0,1
867870,0,1,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1766991,1,0,1,0,0,0,5,0,0,0,...,0,3,0,0,0,0,0,1,0,1
1296695,1,1,0,0,0,0,4,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1138238,0,0,0,0,1,0,3,0,0,0,...,0,0,0,0,0,0,0,4,1,1
64315,0,0,0,0,0,0,2,0,0,0,...,0,0,0,0,2,0,0,0,0,1
442457,0,0,0,0,0,4,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
29655,0,0,0,0,0,0,3,0,0,0,...,0,9,0,0,0,0,0,0,0,1
1861020,1,1,0,0,2,4,4,1,0,1,...,0,0,0,0,0,0,0,0,0,1
1055780,2,0,2,1,0,0,2,0,0,0,...,0,0,0,0,0,0,0,6,0,1
