In [80]:
###############################################################################
# Load all global data and parameters
###############################################################################
import pandas as pd
import numpy as np
import sys
import csv
import math
from scipy.stats import norm
import random

cohort_summary = pd.read_csv("LOS_cohort_summary_sunhwan.csv")

# replace NA to 0 for age and bmi columns
required_features = ['gender', 'age_60', 'age_60_70', 'age_70_80', 'age_80', 
                     'bmi_30', 'bmi_30_35', 'bmi_35_40', 'bmi_40']

cohort_summary = cohort_summary.fillna(value=0)

cohort_summary.head()

features = ["antibiotics", "ever_smoker", "pre_comor_Group3_Endocrine","pre_comor_Anemia", 
		"pre_comor_Group5_MentalDisorder", "pre_comor_Group6_Nervous", "pre_comor_Hypertension",
		"pre_comor_Group8_Respiratory", "pre_comor_Rheumatism"]

features_inorder = ["antibiotics", "pre_comor_Anemia", "pre_comor_Group6_Nervous", 
		 "pre_comor_Group5_MentalDisorder", "pre_comor_Rheumatism", "pre_comor_Group3_Endocrine", 
		 "pre_comor_Group8_Respiratory", "ever_smoker", "pre_comor_Hypertension"]

feature_set = required_features + features

num_features = len(features)

# centroid of 8 clusters
cluster_centroid = pd.read_csv("new_preop_kmeans_13_centers_8_binary_48K.csv")
cluster_features = list(cluster_centroid.columns)
  
# cluster size
cluster_size = pd.read_csv("new_preop_kmeans_13_size_8_binary_48K.csv")

# Average of features from the entirepopulation
pop_avg = ((cohort_summary[features].mul(cohort_summary['num_patient'], axis=0)).sum(axis=0))/sum(cohort_summary['num_patient'])

# alpha level for 99% confidence interval
alpha = 0.01
z = norm.ppf(1-0.5*alpha)

# z value for confidence interval
cr = norm.ppf(0.975)

#import the coefficients and covariance matrix
coe_los = pd.read_csv("formula_glm_LOS.csv")
coe_los = coe_los.coefficients.as_matrix()

coe_rev = pd.read_csv("formula_glm_revision.csv")
coe_rev = coe_rev.coefficients.as_matrix()

coe_com = pd.read_csv("formula_glm_complication.csv")
coe_com = coe_com.coefficients.as_matrix()

coe_porec = pd.read_csv("formula_glm_post_recovery.csv")
coe_porec = coe_porec.coefficients.as_matrix()

cov_los = pd.read_csv("covariance_glm_LOS.csv")
del cov_los['predictor']
cov_los = np.mat(cov_los)

cov_rev = pd.read_csv("covariance_glm_revision.csv")
del cov_rev['predictor']
cov_rev = np.mat(cov_rev)

cov_com = pd.read_csv("covariance_glm_complication.csv")
del cov_com['predictor']
cov_com = np.mat(cov_com)

cov_porec = pd.read_csv("covariance_glm_post_recovery.csv")
del cov_porec['predictor']
cov_porec = np.mat(cov_porec)

###############################################################################
# User defined function for inference of missing information
###############################################################################
def findCondPop(patient):
  # Find the conditional population based on known features ofthe patient
  # Args:
  #   patient: Patient data. NA is for unknown feature value.
  # Returns:
  #   Row index of summary table corresponding to conditional population
  idx = range(cohort_summary.shape[0])
  
  for i in range(len(feature_set)):
    f = feature_set[i]
    if patient[i] is not None:
      idx_f = cohort_summary[cohort_summary[f]==patient[i]].index.tolist()
      idx = [j for j in idx if j in idx_f]
  
  return idx

def initEstimate(pop_idx, patient):
  # Initialize the estimate from the conditional population mean
  # Args:
  #   pop_idx: Row index of cohort_summary table
  #   patient: Patient data. NA is for unknown feature value.
  # Returns:
  #   Range and estimates of features for clustering.
  
  if len(pop_idx) != 0:
    sub_cohort = cohort_summary.iloc[pop_idx]
    num_pop = sum(sub_cohort['num_patient'])
    cond_est = ((sub_cohort[features].mul(sub_cohort['num_patient'], axis=0)).sum(axis=0))/float(num_pop)
  
    bound = [z*math.sqrt(x) for x in cond_est*(1-cond_est)/num_pop]
  else:
    cond_est = pop_avg
    bound = [z*math.sqrt(x) for x in cond_est*(1-cond_est)/sum(cohort_summary['num_patient'])]
    
  bmi_idx = [i for i,v in enumerate(feature_set) if 'bmi' in v]
  f_hat = [v for i,v in enumerate(patient) if i in bmi_idx] + list(cond_est)
  f_range = [0.0]*4 + bound
    
  return f_hat, f_range

#cluster = cluster_centroid
def findSupportClusters(cluster, f_hat, f_range):
  # Compute the support clusters
  # Args:
  #   c: data frame for the centroid of clusters. Each row of the data frame
  #      contains the coordinate of the centorid of each cluster. The number
  #      of the row is the number of clusters and the number of columns is the
  #      number of dimensions
  #   patient_data: Feature vector of patient information used in clustering.
  #   range: 99% confidence interval of estimation of feature vector of the 
  #          patient. If 0, then corresponding feature is known.
  # Returns:
  #   Index of support clusters.

  # Divide known and unknown dimensions or attributes
  idx_known = [i for i,v in enumerate(f_range) if v == 0]
  idx_unknown = [i for i,v in enumerate(f_range) if v != 0]
  known = [v for i,v in enumerate(f_hat) if i in idx_known]
  unknown = [v for i,v in enumerate(f_hat) if i in idx_unknown]
  unknown_bound = [v for i,v in enumerate(f_range) if i in idx_unknown]
  
  #compute the distance with known features
  known_dim = cluster[[cluster_features[i] for i in idx_known]]
  cluster['known_dist'] = ((known_dim - known)**2).sum(axis=1)
  
  support_cluster = set()
  
  for i in range(len(idx_unknown)):
    min_f_hat = unknown[i] - unknown_bound[i]
    sc_min = ((cluster[cluster_features[idx_unknown[i]]] - min_f_hat)**2 + cluster['known_dist']).idxmin()
    support_cluster.add(sc_min)
    max_f_hat = unknown[i] + unknown_bound[i]
    sc_max = ((cluster[cluster_features[idx_unknown[i]]] - max_f_hat)**2 + cluster['known_dist']).idxmin()
    support_cluster.add(sc_max)

  return list(support_cluster)

# cluster = cluster_centroid
def findSupportBox(cluster, support_cluster, f_hat, f_range):
  # Compute the support box
  # Args:
  #   cluster: data frame for the centroid of clusters. Each row of the data frame
  #            contains the coordinate of the centorid of each cluster. The number
  #            of the row is the number of clusters and the number of columns is the
  #             number of dimensions
  #   support_cluster: Index of support clusters
  #   f_hat: Feature vector of patient information used in clustering.
  #   f_range: 99% confidence interval of estimation of feature vector of the 
  #            patient. If 0, then corresponding feature is known.
  # Returns:
  #   Coordinate of support boxes for features used for clustering.

  # select support clusters
  sc = cluster.iloc[support_cluster]
  max_est = [a+b for a,b in zip(f_hat, f_range)]
  min_est = [a-b for a,b in zip(f_hat, f_range)]
  
  box_min = []
  box_max = []

  for f in cluster_features:  
    box_min.append(min(min(list(sc[f])), min_est[cluster_features.index(f)]))
    box_max.append(max(max(list(sc[f])), max_est[cluster_features.index(f)]))
  
  return box_min, box_max

def findGroupCluster(cluster, box_min, box_max):
  # Find clusters inside the support box
  # Args:
  #   cluster: data frame for the centroid of clusters. Each row of the data frame
  #            contains the coordinate of the centorid of each cluster. The number
  #            of the row is the number of clusters and the number of columns is the
  #            number of dimensions
  #   box: Coordinate of support box. box$box_min has the minimum coordinate and 
  #        box$box_max has the maximum coordinate.
  # Returns:
  #   Index of clusters inside the support box.  

  group_cluster = []
  for c in range(cluster.shape[0]):
    if ((cluster.iloc[c][cluster_features] >= box_min).product() and 
        (cluster.iloc[c][cluster_features] <= box_max).product()):
      group_cluster.append(c)
  
  return group_cluster

def updateEstimates(cluster, cluster_size, group_cluster, f_hat, f_range):
  # Update the estimates of features for clustering from group clusters.
  # Args:
  #   cluster: data frame for the centroid of clusters. Each row of the data frame
  #            contains the coordinate of the centorid of each cluster. The number
  #            of the row is the number of clusters and the number of columns is the
  #             number of dimensions
  #   cluster_size: number of population in cluster.
  #   group_cluster: Index of group clusters.
  #   f_hat: Feature vector of patient information used in clustering.
  #   f_range: 99% confidence interval of estimation of feature vector of the 
  #            patient. If 0, then corresponding feature is known.
  # Returns:
  #   Range and estimates of features (probability and binary value) for clustering.

  import math
  
  num = sum(cluster_size.iloc[group_cluster].values)[0]
  pos_num = pd.DataFrame(cluster.iloc[group_cluster][cluster_features].values*cluster_size.iloc[group_cluster].values, columns=cluster_features).sum(axis=0)

  f_hat_new = pos_num / num  
  f_range_new = [z*math.sqrt(x) for x in f_hat_new*(1-f_hat_new)/num]
  f_hat_bin_new = f_hat[:]
  
  for i in range(len(f_range)):
    if f_range[i] == 0:
      f_range_new[i] = f_range[i]
      f_hat_new[cluster_features[i]] = f_hat[i]
    else:
      f_hat_bin_new[i] = int(f_hat_new[cluster_features[i]] >= pop_avg[cluster_features[i]])
  
  return f_hat_new, f_range_new, f_hat_bin_new



def logitT(x):
  #function to perform the log transform
  #x is the input value before applying the link function which is the log transform
  #return the output value after applying the link function
  return ( math.exp(x)/(1+math.exp(x)) )


def covf(x, cov):
  #function to calculate the standard error
  #x is the input vector, cov is the covariance matrix, x.T is to conduct the transpose for x
  #return the output value as standard error
  return (math.sqrt(x*cov*x.T))


def computePredCI2(patient_input, imputed, coe, cov):
  #function to compute the confidence interval
  #patient_input is users answers or designed test data
  #imputed is the imputed list after the process of imputing missings
  #return a dataframe having measures to present to users

  #assemble the users input
  udemo = patient_input[["age", "bmi_pre", "gender"]]
  udemo.loc[:,'gender'] = udemo.loc[:,'gender']-1
  to_compute = imputed[features]

  #append the intercept to model input
  inpt = pd.DataFrame({'intercept': [1]})
  to_compute = pd.concat([inpt, udemo, to_compute], axis=1)
  to_compute = to_compute.as_matrix()
  
  #compute the model formula
  s = np.dot(coe, to_compute.transpose())

  #compute the standard error
  to_compute = np.mat(to_compute)
  se = [covf(to_compute, cov) for x in to_compute]

  #apply logit transform for prediction, lower and upper bounds
  ls = logitT(s[0])
  lsupr = logitT(s[0]+cr*se[0])
  lslwr = logitT(s[0]-cr*se[0])

  #compute the range and its percentage
  rgp = None
  if(ls!=0):
    rgp = (lsupr-lslwr)/ls*100

  #assemble the results to a data frame
  td = pd.DataFrame({'pid':[patient_input.iloc[0]['pid']], 'pred_val':[ls], 'pred_upr':[lsupr],
                  'pred_lwr':[lslwr], 'pred_range':[lsupr-lslwr], 'range_pct': rgp})

  return (td)



In [30]:
#import the patient input structure
patient_input = pd.read_csv("patient_model.csv")
#assign outcome features to NaN
patient_input[['Length_of_Stay','LOS','revision','complication','post_recovery']]=np.nan
#randomly assign the pid
patient_input.pid = random.randint(1, 2000) 
demo_input = pd.read_csv("all_case_demographic.csv")
feature_input = pd.read_csv("all_case_preop_feature.csv")
del feature_input['active_feature'] 
#pick up cases
k=1
pat0=patient_input
#assemble the test data
pat0[list(demo_input.columns.values)]=demo_input.iloc[k].tolist()

#for kj in range(len(feature_input.index)):
kj=1
pat0[list(feature_input.columns.values)]=feature_input.iloc[kj].tolist()


#initial the question pool
feature_toask = features_inorder

In [31]:
pat0

Unnamed: 0,pid,Length_of_Stay,revision,complication,post_recovery,gender,age,age_60,age_60_70,age_70_80,...,antibiotics,ever_smoker,pre_comor_Group3_Endocrine,pre_comor_Anemia,pre_comor_Group5_MentalDisorder,pre_comor_Group6_Nervous,pre_comor_Hypertension,pre_comor_Group8_Respiratory,pre_comor_Rheumatism,LOS
0,715,,,,,2,52,1,0,0,...,1,0,0,0,0,0,0,0,0,


In [32]:
pat = pat0.copy()
for name in feature_toask:
      pat[name]=None

In [34]:
pat

Unnamed: 0,pid,Length_of_Stay,revision,complication,post_recovery,gender,age,age_60,age_60_70,age_70_80,...,antibiotics,ever_smoker,pre_comor_Group3_Endocrine,pre_comor_Anemia,pre_comor_Group5_MentalDisorder,pre_comor_Group6_Nervous,pre_comor_Hypertension,pre_comor_Group8_Respiratory,pre_comor_Rheumatism,LOS
0,715,,,,,2,52,1,0,0,...,1,,,,,,,,,


In [3]:
i=0
#assign the value to impute
patient=pat[feature_set].iloc[0].tolist()

#start to impute
cond_pop = findCondPop(patient)
f_hat, f_range = initEstimate(cond_pop, patient)
support_cluster = findSupportClusters(cluster_centroid, f_hat, f_range)
box_min, box_max = findSupportBox(cluster_centroid, support_cluster, f_hat, f_range)
group_cluster = findGroupCluster(cluster_centroid, box_min, box_max)

In [4]:
if len(group_cluster) == 0:
    feature_est = f_hat
    feature_range = f_range
    feature_est_bin = f_hat

for i in range(len(f_range)):
    if f_range[i] != 0:
        feature_est_bin[i] = int(f_hat[i] >= pop_avg[cluster_features[i]])
    else:
        feature_est, feature_range, feature_est_bin = updateEstimates(cluster_centroid, cluster_size, group_cluster, f_hat, f_range)

In [6]:
impute_output = pd.DataFrame(feature_est[features]).transpose()
impute_output

Unnamed: 0,antibiotics,ever_smoker,pre_comor_Group3_Endocrine,pre_comor_Anemia,pre_comor_Group5_MentalDisorder,pre_comor_Group6_Nervous,pre_comor_Hypertension,pre_comor_Group8_Respiratory,pre_comor_Rheumatism
0,0.78733,0.102247,0.367389,0.061364,0.04636,0.076129,0.057315,0.106771,0.073113


In [9]:
pd.__version__

'0.16.2'

In [93]:
patient_output = computePredCI2(patient_input, impute_output, coe_los, cov_los)
patient_output

Unnamed: 0,pid,pred_lwr,pred_range,pred_upr,pred_val,range_pct
0,715,0.070972,0.014052,0.085024,0.077708,18.082545


In [9]:
print "the value  " + str(round(patient_output.pred_val.values[0]*100,2)) + "%"

the value  3.14%


In [25]:
type(patient_output.pred_val)

pandas.core.series.Series

In [35]:
patient_input

Unnamed: 0,pid,Length_of_Stay,revision,complication,post_recovery,gender,age,age_60,age_60_70,age_70_80,...,antibiotics,ever_smoker,pre_comor_Group3_Endocrine,pre_comor_Anemia,pre_comor_Group5_MentalDisorder,pre_comor_Group6_Nervous,pre_comor_Hypertension,pre_comor_Group8_Respiratory,pre_comor_Rheumatism,LOS
0,1941,,,,,2,54,1,0,0,...,,,,,,,,,,


In [15]:
patient_input.age = raw_input("Please enter your age: ")

Please enter your age: 54


In [10]:
del feature_toask[0]

In [11]:
feature_toask

['pre_comor_Anemia',
 'pre_comor_Group6_Nervous',
 'pre_comor_Group5_MentalDisorder',
 'pre_comor_Rheumatism',
 'pre_comor_Group3_Endocrine',
 'pre_comor_Group8_Respiratory',
 'ever_smoker',
 'pre_comor_Hypertension']

In [14]:
pat0

Unnamed: 0,pid,Length_of_Stay,revision,complication,post_recovery,gender,age,age_60,age_60_70,age_70_80,...,antibiotics,ever_smoker,pre_comor_Group3_Endocrine,pre_comor_Anemia,pre_comor_Group5_MentalDisorder,pre_comor_Group6_Nervous,pre_comor_Hypertension,pre_comor_Group8_Respiratory,pre_comor_Rheumatism,LOS
0,963,,,,,2,52,1,0,0,...,,,,,,,,,,


In [35]:
demo_input.columns.values

array(['gender', 'age', 'age_60', 'age_60_70', 'age_70_80', 'age_80',
       'bmi_pre', 'bmi_30', 'bmi_30_35', 'bmi_35_40', 'bmi_40'], dtype=object)

In [None]:
patient_input[["age","bmi_pre","gender"]]=[52, 22, 2]
patient_input.loc[patient_input.age<60, 'age_60']=1

In [37]:
patient_input[required_features]

Unnamed: 0,gender,age_60,age_60_70,age_70_80,age_80,bmi_30,bmi_30_35,bmi_35_40,bmi_40
0,2,1,0,0,0,1,0,0,0


In [57]:
patient_input.iloc[0]['age']

'76'

In [61]:

patient_input[required_features]=0
#patient_input.age = eval(raw_input("Please enter your age: "))
patient_input.age = input("Please enter your age: ")
p_age= patient_input.iloc[0]['age']
if p_age < 60:
    patient_input['age_60']=1
elif p_age >= 60 and p_age < 70:
    patient_input['age_60_70']=1
elif p_age >= 70 and p_age < 80:
    patient_input['age_70_80']=1
else:
    patient_input['age_80']=1
patient_input[required_features]

Please enter your age: 84


Unnamed: 0,gender,age_60,age_60_70,age_70_80,age_80,bmi_30,bmi_30_35,bmi_35_40,bmi_40
0,0,0,0,0,1,0,0,0,0


In [64]:
udemo = patient_input[["age", "bmi_pre", "gender"]]
#udemo.loc[:,'gender'] = udemo.loc[:,'gender']-1
udemo.loc[:,'gender'] = udemo.iloc[0]['gender']-1
udemo

Unnamed: 0,age,bmi_pre,gender
0,84,22,-1


In [65]:
p_weight = input("Please enter your weight(LB): ")
p_height = input("Please enter your height(inch): ")

p_bmi = p_weight*730/math.pow(p_height,2)
patient_input.bmi_pre = p_bmi

if p_bmi < 30:
  patient_input['bmi_30']=1
elif p_age >= 30 and p_age < 35:
  patient_input['bmi_30_35']=1
elif p_age >= 35 and p_age < 40:
  patient_input['bmi_35_40']=1
else:
  patient_input['bmi_40']=1
patient_input[required_features]

Please enter your weight(LB): 150
Please enter your height(inch): 67


Unnamed: 0,gender,age_60,age_60_70,age_70_80,age_80,bmi_30,bmi_30_35,bmi_35_40,bmi_40
0,0,0,0,0,1,1,0,0,0


In [66]:
patient_input[['age','bmi_pre']]

Unnamed: 0,age,bmi_pre
0,84,24.392961


In [71]:
p_gender = raw_input("Please enter your gender(M or F): ")
if p_gender=='M':
    patient_input['gender']=1
else:
    patient_input['gender']=2
patient_input[['gender']]

Please enter your gender(M or F): M


Unnamed: 0,gender
0,1


In [82]:
feature_toask = features_inorder[:]
feature_toask

['antibiotics',
 'pre_comor_Anemia',
 'pre_comor_Group6_Nervous',
 'pre_comor_Group5_MentalDisorder',
 'pre_comor_Rheumatism',
 'pre_comor_Group3_Endocrine',
 'pre_comor_Group8_Respiratory',
 'ever_smoker',
 'pre_comor_Hypertension']

In [87]:
p_answer=raw_input("We'd like to ask you - do you have " + feature_toask[0] + "(Y/N)?")
if p_answer=='Y':
    patient_input[feature_toask[0]]=1
else:
    patient_input[feature_toask[0]]=0

del feature_toask[0]
patient_input[features_inorder]

We'd like to ask you - do you have pre_comor_Rheumatism(Y/N)?N


Unnamed: 0,antibiotics,pre_comor_Anemia,pre_comor_Group6_Nervous,pre_comor_Group5_MentalDisorder,pre_comor_Rheumatism,pre_comor_Group3_Endocrine,pre_comor_Group8_Respiratory,ever_smoker,pre_comor_Hypertension
0,1,0,1,1,0,0,0,0,0


In [94]:
pt = patient_output.copy()

In [95]:
pt.values

array([[  7.15000000e+02,   7.09722929e-02,   1.40515306e-02,
          8.50238235e-02,   7.77077058e-02,   1.80825446e+01]])

In [157]:
pt = patient_output.iloc[0]
type(pt)

pandas.core.series.Series

In [103]:
round(pt['pred_val'],3)*100

7.8

In [105]:
qr_los = pd.read_csv("quantile_fitted_LOS.csv")

In [122]:
qr_los[(qr_los['quantile']>=0.042)][:1].iloc[0]['percent']

'15%'

In [127]:
qr_los[(qr_los['quantile']>=0.042)].index.tolist()[0]*5

15

In [139]:
import time
start_time = time.time()
for i in xrange(0,1000000):
    y=i+i
print("--- %s seconds ---" % (time.time() - start_time))

--- 0.0979998111725 seconds ---


In [155]:
class glmout:
    #import lib and global vars
    import pandas as pd
    import numpy as np
    from scipy.stats import norm
    
    demo_features = ["age", "bmi_pre", "gender"]
    features_inorder = ["antibiotics", "pre_comor_Anemia", "pre_comor_Group6_Nervous", 
                       "pre_comor_Group5_MentalDisorder", "pre_comor_Rheumatism", "pre_comor_Group3_Endocrine", 
                       "pre_comor_Group8_Respiratory", "ever_smoker", "pre_comor_Hypertension"]

    predictor_set = demo_features + features_inorder

    # z value for confidence interval
    cr = norm.ppf(0.975)
    
    #import the coefficients, covariance matrix and quantile profiles
    coe_los = pd.read_csv("formula_glm_LOS.csv")
    coe_los = coe_los.coefficients.as_matrix()
    
    cov_los = pd.read_csv("covariance_glm_LOS.csv")
    del cov_los['predictor']
    cov_los = np.mat(cov_los)
    
    qr_los = pd.read_csv("quantile_fitted_LOS.csv")
    
    imputationCount = 0
    
   #initialize the class
    
    def __init__(self):
        glmout.imputationCount += 1
        
    def logitT(self, x):
        import math
        #function to perform the log transform
        #x is the input value before applying the link function which is the log transform
        #return the output value after applying the link function
        return ( math.exp(x)/(1+math.exp(x)) )

    def covf(self, x, cov):
        import math
        #function to calculate the standard error
        #x is the input vector, cov is the covariance matrix, x.T is to conduct the transpose for x
        #return the output value as standard error
        return (math.sqrt(x*cov*x.T))
     
    
    def computePredCI2(self, imputed, coe, cov):
    #function to compute the confidence interval
    #patient_input is users answers or designed test data
    #imputed is the imputed list after the process of imputing missings
    #return a dataframe having measures to present to users
    #assemble the users input
        pat = imputed.copy()
        pat.loc[:,'gender'] = pat.loc[:,'gender']-1
        to_compute = pat[glmout.predictor_set]
        
        #append the intercept to model input
        inpt = pd.DataFrame({'intercept': [1]})
        to_compute = pd.concat([inpt, to_compute], axis=1)
        to_compute = to_compute.as_matrix()
        
    #compute the model formula
        s = np.dot(coe, to_compute.transpose())
    #compute the standard error
        to_compute = np.mat(to_compute)
        se = [self.covf(to_compute, cov) for x in to_compute]
        
    #apply logit transform for prediction, lower and upper bounds
        ls = self.logitT(s[0])
        lsupr = self.logitT(s[0]+cr*se[0])
        lslwr = self.logitT(s[0]-cr*se[0])
        
    #compute the range and its percentage
    #assemble the results to a data frame
        rgp = None
        if(ls!=0):
            rgp = (lsupr-lslwr)/ls*100
        computed_val = pd.DataFrame({'pid':[patient_input.iloc[0]['pid']], 'pred_val':[ls], 'pred_upr':[lsupr],'pred_lwr':[lslwr], 'pred_range':[lsupr-lslwr], 'range_pct': rgp})
        return (computed_val)
    
    def compute_model(self, feature_est):
        #compute the model
        #input of the function
        #feature_est is the output from the class imputer - a series
        #patient_output is the output - a dataframe
        #start to compute the model
        
        imputed_output = pd.DataFrame(feature_est[glmout.predictor_set]).transpose()
        patient_output = self.computePredCI2(imputed_output, glmout.coe_los, glmout.cov_los)
        
        #return the results
        print patient_output
        return (patient_output)
    

In [160]:
patient_input.copy

<bound method DataFrame.copy of    pid  Length_of_Stay  revision  complication  post_recovery  gender  age  \
0  715             NaN       NaN           NaN            NaN       1   84   

   age_60  age_60_70  age_70_80 ...   antibiotics  ever_smoker  \
0       0          0          0 ...             1            0   

   pre_comor_Group3_Endocrine  pre_comor_Anemia  \
0                           0                 0   

   pre_comor_Group5_MentalDisorder  pre_comor_Group6_Nervous  \
0                                1                         1   

   pre_comor_Hypertension  pre_comor_Group8_Respiratory  pre_comor_Rheumatism  \
0                       0                             0                     0   

   LOS  
0  NaN  

[1 rows x 26 columns]>