Measure human perception error

In [1]:
import pandas as pd
df = pd.read_csv('data5.csv') # read the file
df # view the dataset

Unnamed: 0.1,Unnamed: 0,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,priority
0,1,Vital_Sign,60,Pre-procedural,Moderately Unstable,Confused,Calm,Severe,2-assist,7
1,2,Vital_Sign,60,Pre-procedural,Moderately Unstable,Confused,Calm,Severe,2-assist,8
2,3,Procedural,50,Pre-procedural,Unstable,Oriented,Anxious,,1-assist,8
3,4,Procedural,50,Pre-procedural,Unstable,Oriented,Anxious,,1-assist,8
4,5,Alarm,50,Pre-procedural,Unstable,Confused,Anxious,Severe,2-assist,7
...,...,...,...,...,...,...,...,...,...,...
2895,2896,Alarm,90,Post-procedural,Moderately Unstable,Oriented,Calm,Severe,2-assist,9
2896,2897,Vital_Sign,50,Pre-procedural,Moderately Unstable,Oriented,Calm,,1-assist,6
2897,2898,Vital_Sign,50,Pre-procedural,Moderately Unstable,Oriented,Calm,,1-assist,6
2898,2899,Medication,90,Post-procedural,Unstable,Oriented,Calm,Severe,2-assist,8


In [2]:
# convert the continuous variable into categorial in order to apply this column in groupby()
df['Age_range'] = df.Age_range.astype('category')

In [3]:
df.columns

Index(['Unnamed: 0', 'Needs_list', 'Age_range', 'Procedure_list',
       'Physical_Cond_list', 'Cognitive_Impair_list', 'Emo_State_list',
       'Pain_list', 'Assist_Level_list', 'priority'],
      dtype='object')

In [4]:
# remove unnamed column(s) and check whether all unnamed columns have been removed from the dataset
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.columns

Index(['Needs_list', 'Age_range', 'Procedure_list', 'Physical_Cond_list',
       'Cognitive_Impair_list', 'Emo_State_list', 'Pain_list',
       'Assist_Level_list', 'priority'],
      dtype='object')

In [5]:
# find the absolute difference in the same questions
# for example, a participant ranked 7 in a combination for the first time, 
# then ranked 8 when he/she saw the same combination for the second time.
# (each combination of physical conditions is repeated twice)
error = df.groupby(['Needs_list', 'Age_range', 'Procedure_list', 'Physical_Cond_list',
                    'Cognitive_Impair_list', 'Emo_State_list', 'Pain_list',
                    'Assist_Level_list']).diff().abs().mean()
error

priority    0.876552
dtype: float64

Decision Tree (using Cross Validation)

In [6]:
import pandas as pd
df = pd.read_csv('data5.csv') # read the file
df # view the dataset

Unnamed: 0.1,Unnamed: 0,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,priority
0,1,Vital_Sign,60,Pre-procedural,Moderately Unstable,Confused,Calm,Severe,2-assist,7
1,2,Vital_Sign,60,Pre-procedural,Moderately Unstable,Confused,Calm,Severe,2-assist,8
2,3,Procedural,50,Pre-procedural,Unstable,Oriented,Anxious,,1-assist,8
3,4,Procedural,50,Pre-procedural,Unstable,Oriented,Anxious,,1-assist,8
4,5,Alarm,50,Pre-procedural,Unstable,Confused,Anxious,Severe,2-assist,7
...,...,...,...,...,...,...,...,...,...,...
2895,2896,Alarm,90,Post-procedural,Moderately Unstable,Oriented,Calm,Severe,2-assist,9
2896,2897,Vital_Sign,50,Pre-procedural,Moderately Unstable,Oriented,Calm,,1-assist,6
2897,2898,Vital_Sign,50,Pre-procedural,Moderately Unstable,Oriented,Calm,,1-assist,6
2898,2899,Medication,90,Post-procedural,Unstable,Oriented,Calm,Severe,2-assist,8


In [7]:
df.columns # get columns' names for dummy

Index(['Unnamed: 0', 'Needs_list', 'Age_range', 'Procedure_list',
       'Physical_Cond_list', 'Cognitive_Impair_list', 'Emo_State_list',
       'Pain_list', 'Assist_Level_list', 'priority'],
      dtype='object')

In [8]:
# create dummy variables for modeling, since most of my inputs are categorical 
dclass = pd.get_dummies(df[['Needs_list', 'Age_range', 'Procedure_list',
       'Physical_Cond_list', 'Cognitive_Impair_list', 'Emo_State_list',
       'Pain_list', 'Assist_Level_list']], drop_first = True)
dclass.columns

Index(['Age_range', 'Needs_list_Alarm', 'Needs_list_Education',
       'Needs_list_Medication', 'Needs_list_Mobility', 'Needs_list_Procedural',
       'Needs_list_Rounding', 'Needs_list_Vital_Sign',
       'Procedure_list_Pre-procedural', 'Physical_Cond_list_Stable',
       'Physical_Cond_list_Unstable', 'Cognitive_Impair_list_Oriented',
       'Emo_State_list_Calm', 'Pain_list_None', 'Pain_list_Severe',
       'Assist_Level_list_2-assist', 'Assist_Level_list_Independent'],
      dtype='object')

In [9]:
# separate independent variables and dependent variable
Xclass = dclass[['Age_range', 'Needs_list_Alarm', 'Needs_list_Education',
       'Needs_list_Medication', 'Needs_list_Mobility', 'Needs_list_Procedural',
       'Needs_list_Rounding', 'Needs_list_Vital_Sign',
       'Procedure_list_Pre-procedural', 'Physical_Cond_list_Stable',
       'Physical_Cond_list_Unstable', 'Cognitive_Impair_list_Oriented',
       'Emo_State_list_Calm', 'Pain_list_None', 'Pain_list_Severe',
       'Assist_Level_list_2-assist', 'Assist_Level_list_Independent']]
yclass = df[['priority']]

In [10]:
# split the dataset into training set and test set
from sklearn.model_selection import train_test_split
Xclass_train, Xclass_test, yclass_train, yclass_test = train_test_split(Xclass, yclass, test_size=0.4, 
                                                    random_state = 150)

In [11]:
# decision tree model in training set without cross validation
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()

In [12]:
# using the training set for cross validation
from sklearn.model_selection import GridSearchCV
parameters = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, None]}
grid = GridSearchCV(clf, parameters, cv = 10, n_jobs = -1)
grid.fit(X = Xclass_train, y = yclass_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=DecisionTreeClassifier(class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort=False, random_state=None,
                                              splitter='best'),
             iid='warn', n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, None]},
    

In [13]:
grid.best_params_

{'criterion': 'entropy', 'max_depth': 9}

In [14]:
opt_clf = DecisionTreeClassifier(criterion = 'entropy', max_depth = 9, random_state = 767)
opt_clf.fit(X = Xclass_train, y = yclass_train)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=9,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=767, splitter='best')

In [15]:
# get pridicted priority using test set
ygrid_hat = opt_clf.predict(Xclass_test)

In [16]:
# compare real priority and pridicted priority
residuals = ygrid_hat - yclass_test['priority']

In [17]:
# calculate customized accuracy
accurate = abs(residuals) <= round(error.priority, 0)
accuracy_class = sum(accurate)/len(ygrid_hat)
print('The customized accuracy of the model is', round(accuracy_class*100, 2), '%.')

The customized accuracy of the model is 84.74 %.


Random Forest (using Cross Validation)

In [18]:
import pandas as pd
df = pd.read_csv('data5.csv') # read the file
df # view the dataset

Unnamed: 0.1,Unnamed: 0,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,priority
0,1,Vital_Sign,60,Pre-procedural,Moderately Unstable,Confused,Calm,Severe,2-assist,7
1,2,Vital_Sign,60,Pre-procedural,Moderately Unstable,Confused,Calm,Severe,2-assist,8
2,3,Procedural,50,Pre-procedural,Unstable,Oriented,Anxious,,1-assist,8
3,4,Procedural,50,Pre-procedural,Unstable,Oriented,Anxious,,1-assist,8
4,5,Alarm,50,Pre-procedural,Unstable,Confused,Anxious,Severe,2-assist,7
...,...,...,...,...,...,...,...,...,...,...
2895,2896,Alarm,90,Post-procedural,Moderately Unstable,Oriented,Calm,Severe,2-assist,9
2896,2897,Vital_Sign,50,Pre-procedural,Moderately Unstable,Oriented,Calm,,1-assist,6
2897,2898,Vital_Sign,50,Pre-procedural,Moderately Unstable,Oriented,Calm,,1-assist,6
2898,2899,Medication,90,Post-procedural,Unstable,Oriented,Calm,Severe,2-assist,8


In [19]:
# convert categorical variables to numerical representation without an arbitrary ordering
df_dummies = pd.get_dummies(df[['Needs_list', 'Age_range', 'Procedure_list',
       'Physical_Cond_list', 'Cognitive_Impair_list', 'Emo_State_list',
       'Pain_list', 'Assist_Level_list']], drop_first = True)
df_dummies.columns

Index(['Age_range', 'Needs_list_Alarm', 'Needs_list_Education',
       'Needs_list_Medication', 'Needs_list_Mobility', 'Needs_list_Procedural',
       'Needs_list_Rounding', 'Needs_list_Vital_Sign',
       'Procedure_list_Pre-procedural', 'Physical_Cond_list_Stable',
       'Physical_Cond_list_Unstable', 'Cognitive_Impair_list_Oriented',
       'Emo_State_list_Calm', 'Pain_list_None', 'Pain_list_Severe',
       'Assist_Level_list_2-assist', 'Assist_Level_list_Independent'],
      dtype='object')

In [20]:
Xrf = df_dummies[['Age_range', 'Needs_list_Alarm', 'Needs_list_Education',
       'Needs_list_Medication', 'Needs_list_Mobility', 'Needs_list_Procedural',
       'Needs_list_Rounding', 'Needs_list_Vital_Sign',
       'Procedure_list_Pre-procedural', 'Physical_Cond_list_Stable',
       'Physical_Cond_list_Unstable', 'Cognitive_Impair_list_Oriented',
       'Emo_State_list_Calm', 'Pain_list_None', 'Pain_list_Severe',
       'Assist_Level_list_2-assist', 'Assist_Level_list_Independent']]
yrf = df[['priority']]

In [21]:
# separate tarining and test set
from sklearn.model_selection import train_test_split 
Xrf_train, Xrf_test, yrf_train, yrf_test = train_test_split(Xrf, yrf, test_size=0.4, 
                                                    random_state = 150)

In [22]:
from sklearn.model_selection import GridSearchCV
# find the optimal hyperparameters 
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000],
    'oob_score': [True, False]
}

In [23]:
# create a based model
from sklearn.ensemble import RandomForestClassifier
rf0 = RandomForestClassifier(random_state = 150)
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf0, param_grid = param_grid, 
                          cv = 3, n_jobs = -1)

In [24]:
import time
start = time.process_time() # how long the process will run
# fit the grid search to the data
grid_search.fit(Xrf_train, yrf_train)
print(time.process_time() - start)

  self.best_estimator_.fit(X, y, **fit_params)


19.131062


In [25]:
# get the best combination of parameters
grid_search.best_params_

{'bootstrap': True,
 'max_depth': 80,
 'max_features': 2,
 'min_samples_leaf': 3,
 'min_samples_split': 8,
 'n_estimators': 100,
 'oob_score': True}

In [26]:
rf = RandomForestClassifier(bootstrap = True,
                            max_depth = 80,
                            max_features = 2,
                            min_samples_leaf = 3,
                            min_samples_split = 8,
                            n_estimators = 100,
                            random_state = 150,
                            oob_score = True)
rf.fit(Xrf_train, yrf_train)

  if __name__ == '__main__':


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=80, max_features=2, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=8,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=True, random_state=150, verbose=0,
                       warm_start=False)

In [27]:
# get predicted priority from test set
yrf_hat = rf.predict(Xrf_test)

In [28]:
# compare real priority and pridicted priority
rf_residuals = yrf_hat - yrf_test['priority']

In [29]:
# calculate customized accuracy
rf_accurate = abs(rf_residuals) <= round(error.priority, 0)
rf_accuracy = sum(rf_accurate)/len(yrf_hat)
print('The customized accuracy of the model is', round(rf_accuracy*100, 2), '%.')

The customized accuracy of the model is 89.4 %.


Consider waiting time while deciding which patient should be served first 
(using some simulated patients as an example)

In [53]:
import pandas as pd

In [54]:
# create a dataset with patients' bed numbers, physical conditions, and their coming time
simulation = {'Bed': [34, 55, 28, 47],
              'Needs_list': ['Education', 'Mobility', 'Medication', 'Rounding'],
              'Age_range':['70', '60', '90', '50'],
              'Procedure_list': ['Post-procedural', 'Post-procedural', 'Post-procedural', 'Pre-procedural'], 
              'Physical_Cond_list': ['Stable', 'Stable', 'Unstable', 'Moderately Unstable'],
              'Cognitive_Impair_list': ['Oriented', 'Confused', 'Confused','Oriented'],
              'Emo_State_list': ['Calm', 'Anxious', 'Anxious', 'Anxious'],
              'Pain_list': ['Moderate', 'Moderate', 'None', 'Severe'],
              'Assist_Level_list': ['Independent', 'Independent', '2-assist', '2-assist'],
              'Coming_Time': ['00:00:00', '00:03:49', '00:07:28', '00:07:56']}

In [55]:
# convert simulation to dataframe
current_patients = pd.DataFrame(simulation, columns = ['Bed', 'Needs_list', 'Age_range', 'Procedure_list',
       'Physical_Cond_list', 'Cognitive_Impair_list', 'Emo_State_list',
       'Pain_list', 'Assist_Level_list', 'Coming_Time']) 
current_patients

Unnamed: 0,Bed,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,Coming_Time
0,34,Education,70,Post-procedural,Stable,Oriented,Calm,Moderate,Independent,00:00:00
1,55,Mobility,60,Post-procedural,Stable,Confused,Anxious,Moderate,Independent,00:03:49
2,28,Medication,90,Post-procedural,Unstable,Confused,Anxious,,2-assist,00:07:28
3,47,Rounding,50,Pre-procedural,Moderately Unstable,Oriented,Anxious,Severe,2-assist,00:07:56


In [56]:
# convert Coming_Time to a time variable
current_patients['Coming_Time'] = pd.to_timedelta(current_patients['Coming_Time'], unit='T')

In [57]:
current_patients.dtypes

Bed                                int64
Needs_list                        object
Age_range                         object
Procedure_list                    object
Physical_Cond_list                object
Cognitive_Impair_list             object
Emo_State_list                    object
Pain_list                         object
Assist_Level_list                 object
Coming_Time              timedelta64[ns]
dtype: object

In [70]:
# if current time is 00:06:00
T =  pd.to_timedelta('00:06:00', unit = 'T')

In [71]:
# add waiting time column into the dataset
wt = T - current_patients.Coming_Time
current_patients['Waiting_Time'] = wt.dt.total_seconds()/60
current_patients

Unnamed: 0,Bed,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,Coming_Time,Waiting_Time
0,34,Education,70,Post-procedural,Stable,Oriented,Calm,Moderate,Independent,00:00:00,6.0
1,55,Mobility,60,Post-procedural,Stable,Confused,Anxious,Moderate,Independent,00:03:49,2.183333
2,28,Medication,90,Post-procedural,Unstable,Confused,Anxious,,2-assist,00:07:28,-1.466667
3,47,Rounding,50,Pre-procedural,Moderately Unstable,Oriented,Anxious,Severe,2-assist,00:07:56,-1.933333


In [79]:
# create the sorting for current patients (coming before T)
now = current_patients[current_patients['Waiting_Time'] > 0]
now

Unnamed: 0,Needs_list_Mobility,Age_range_70,Cognitive_Impair_list_Oriented,Emo_State_list_Calm
0,0,1,1,1
1,1,0,0,0


In [73]:
# predict priority for these patients
d = now[['Needs_list', 'Age_range', 'Procedure_list', 'Physical_Cond_list',
       'Cognitive_Impair_list', 'Emo_State_list', 'Pain_list', 'Assist_Level_list']]
df = pd.read_csv('data5.csv') # read the original file in order to help dummy converting
da = df.append(d)
da.dtypes

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Age_range                 object
Assist_Level_list         object
Cognitive_Impair_list     object
Emo_State_list            object
Needs_list                object
Pain_list                 object
Physical_Cond_list        object
Procedure_list            object
Unnamed: 0               float64
priority                 float64
dtype: object

In [74]:
# convert age to numeric
da['Age_range'] = pd.to_numeric(da['Age_range'])
da.dtypes

Age_range                  int64
Assist_Level_list         object
Cognitive_Impair_list     object
Emo_State_list            object
Needs_list                object
Pain_list                 object
Physical_Cond_list        object
Procedure_list            object
Unnamed: 0               float64
priority                 float64
dtype: object

In [75]:
# get only current patients' dummy dataset for prediction
da_dummies = pd.get_dummies(da[['Needs_list', 'Age_range', 'Procedure_list',
       'Physical_Cond_list', 'Cognitive_Impair_list', 'Emo_State_list',
       'Pain_list', 'Assist_Level_list']], drop_first = True)
current = da_dummies.tail(len(now))
current

Unnamed: 0,Age_range,Needs_list_Alarm,Needs_list_Education,Needs_list_Medication,Needs_list_Mobility,Needs_list_Procedural,Needs_list_Rounding,Needs_list_Vital_Sign,Procedure_list_Pre-procedural,Physical_Cond_list_Stable,Physical_Cond_list_Unstable,Cognitive_Impair_list_Oriented,Emo_State_list_Calm,Pain_list_None,Pain_list_Severe,Assist_Level_list_2-assist,Assist_Level_list_Independent
0,70,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1
1,60,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1


In [64]:
# get predicted priorities and add them back to current patients' dataset
now['predicted_priority'] = rf.predict(current)
now

Unnamed: 0,Bed,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,Coming_Time,Waiting_Time,predicted_priority
0,34,Education,70,Post-procedural,Stable,Oriented,Calm,Moderate,Independent,00:00:00,10.0,7
1,55,Mobility,60,Post-procedural,Stable,Confused,Anxious,Moderate,Independent,00:03:49,6.183333,7
2,28,Medication,90,Post-procedural,Unstable,Confused,Anxious,,2-assist,00:07:28,2.533333,8
3,47,Rounding,50,Pre-procedural,Moderately Unstable,Oriented,Anxious,Severe,2-assist,00:07:56,2.066667,7


In [65]:
# calculate sorting priority
now.loc[now['Waiting_Time'] < 5, 'current_priority'] = now['predicted_priority']
now.loc[now['Waiting_Time'] >= 5, 'current_priority'] = now['predicted_priority'] + (10 - now['predicted_priority'])/2
now.loc[(now['Waiting_Time'] >= 10) & (now['current_priority'] <= 8), 'current_priority'] = 8
now

Unnamed: 0,Bed,Needs_list,Age_range,Procedure_list,Physical_Cond_list,Cognitive_Impair_list,Emo_State_list,Pain_list,Assist_Level_list,Coming_Time,Waiting_Time,predicted_priority,current_priority
0,34,Education,70,Post-procedural,Stable,Oriented,Calm,Moderate,Independent,00:00:00,10.0,7,8.5
1,55,Mobility,60,Post-procedural,Stable,Confused,Anxious,Moderate,Independent,00:03:49,6.183333,7,8.5
2,28,Medication,90,Post-procedural,Unstable,Confused,Anxious,,2-assist,00:07:28,2.533333,8,8.0
3,47,Rounding,50,Pre-procedural,Moderately Unstable,Oriented,Anxious,Severe,2-assist,00:07:56,2.066667,7,7.0


In [66]:
# rank patients based on their sorting priorities
now['index'] = now['current_priority'].rank(ascending = 1) 
rank = now.set_index('index')
pd.DataFrame(rank[['Bed', 'Needs_list', 'Pain_list', 'Assist_Level_list', 'Waiting_Time', 'current_priority']])

Unnamed: 0_level_0,Bed,Needs_list,Pain_list,Assist_Level_list,Waiting_Time,current_priority
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3.5,34,Education,Moderate,Independent,10.0,8.5
3.5,55,Mobility,Moderate,Independent,6.183333,8.5
2.0,28,Medication,,2-assist,2.533333,8.0
1.0,47,Rounding,Severe,2-assist,2.066667,7.0
