In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import glob as glob
from datetime import datetime
def subset_permute(n_subset, n_total):
    #An effort to make a random list of range of integers as even as possible.
    #i.e., np.random.randint() gave quite variable results. 
    np.random.seed()
    pm = np.tile ( np.arange(n_subset),  int(np.ceil(n_total/n_subset))) [0:n_total] #[1,2,3,4,5,1,2,3,4 ... 1,2]
    for i in range(20):
        np.random.shuffle(pm) #Shuffle somewhat evenly distributed array. 
    return pm

def parse_datetime(raw_datetime):
    datetime_temp = str.split(raw_datetime, ' ')

    if len(datetime_temp)<2:
        weekday=np.nan
        timeofday=np.nan
        mm = np.nan  
        dd= np.nan
        return weekday, timeofday, mm, dd

    date_temp = str.split(datetime_temp[0],'/')
    yy = int(date_temp[2])
    mm = int(date_temp[0])
    dd = int(date_temp[1])

    time_temp = str.split(datetime_temp[1],':')
    hh = int(time_temp[0])
    mn = int(time_temp[1])

    datetime_obj = datetime(yy,mm,dd,hh,mn)
    #datetime_list = datetime_obj
    weekday = datetime_obj.weekday()
    if 8<= datetime_obj.hour and datetime_obj.hour < 12:
        timeofday = 'AM'
    elif 12 <= datetime_obj.hour and datetime_obj.hour <= 17: 
        timeofday = 'PM'
    else: 
        timeofday = 'OFF'
    return weekday, timeofday, mm, dd

def query_weather(month, day):
    #https://www.ncdc.noaa.gov/cgi-bin/climatenormals/climatenormals.pl?directive=prod_select2&prodtype=CLIM8101&subrnum=
    if np.isnan(month) or np.isnan(day):
        return np.nan, np.nan, np.nan, np.nan
    WeatherFile ='CA045115.xlsx'
    MnT = pd.read_excel(WeatherFile,sheetname='MinTemp')
    MxT = pd.read_excel(WeatherFile,sheetname='MaxTemp')
    AvgT = pd.read_excel(WeatherFile,sheetname='AvgTemp')
    Prcp = pd.read_excel(WeatherFile,sheetname='Precipitation')
    
    return MnT.iloc[month-1, day-1], MxT.iloc[month-1, day-1], AvgT.iloc[month-1, day-1], Prcp.iloc[month-1, day-1]

def get_label(cancel_list, valid_reason):
    ct = cancel_list.value_counts()

    labels = np.zeros(cancel_list.shape)
    toinclude = np.array(valid_reason)
    for reason in toinclude: 
        labels += cancel_list ==reason
    labels=(labels>0).astype(int) #Cancel == 1
    return labels


In [2]:
data_raw_fname = 'be223a_dataset.csv'
data_raw = pd.read_csv(data_raw_fname)

raw_datetime = data_raw['ScheduledDTTM_D']
    
weekday = pd.Series([])
timeofday = pd.Series([])
MinTemp = pd.Series([])
MaxTemp = pd.Series([])
AvgTemp = pd.Series([])
Precip = pd.Series([])

num_samples = raw_datetime.shape[0]

for i,rd in enumerate(data_raw['ScheduledDTTM_D']):
    weekday[i],timeofday[i],mm,dd=parse_datetime(rd)
    MinTemp[i],MaxTemp[i],AvgTemp[i],Precip[i]=query_weather(mm,dd)
label = get_label(data_raw['ReasonDesc'], ['CANCELLED BY PT', 'PT NO SHOW'])
features = pd.concat([
    data_raw[['Gender','Age','OrgCode','Modality','Anatomy','SubSpecialty']],
    pd.DataFrame({'Weekday':weekday, 'Timeofday':timeofday,
                  'MinTemp':MinTemp,'MaxTemp':MaxTemp,'AvgTemp':AvgTemp,'Precip':Precip,'Label':label})
                     ],axis=1)
features = features[['Gender','Age','OrgCode','Modality','Anatomy','SubSpecialty',
                     'Weekday','Timeofday','MinTemp','MaxTemp','AvgTemp','Precip','Label']]

features.to_csv('features_raw2.csv')
features_encoded=pd.get_dummies(features)
features_encoded.to_csv('features_encoded2.csv')





In [None]:
data_raw_fname = 'be223a_dataset.csv'
data_raw = pd.read_csv(data_raw_fname)

cancel_list = data_raw['ReasonDesc']
#ct = labels.groupby(labels).count()
ct = cancel_list.value_counts()

labels = np.zeros(cancel_list.shape)
toinclude = np.array(['CANCELLED BY PT', 'PT NO SHOW'])
for reason in toinclude: 
    labels += cancel_list ==reason
labels=(labels>0).astype(int) #Cancel == 1

features_encoded = pd.read_csv('features_encoded.csv')
features_encoded=features_encoded.rename(columns={features_encoded.columns[0]:'orig_index'})
features_encoded = pd.concat([features_encoded,pd.DataFrame({'Labels':labels})], axis=1)
show=features_encoded[features_encoded['Labels']==0]
noshow=features_encoded[features_encoded['Labels']==1]

show_subset = show.iloc[np.random.choice(show.shape[0],noshow.shape[0],replace=False)] #Balancing the numbers
features_final=pd.concat([noshow, show_subset],axis=0)
features_final=features_final.reset_index(drop=True)

n_fold = 5
foldlist = subset_permute(n_fold,features_final.shape[0]) #List of fold numbers
features_final = pd.concat([features_final,pd.DataFrame({'Folds':foldlist})], axis=1)
features_final = pd.concat([features_final.iloc[:,1:], features_final.iloc[:,0]],axis=1)
display(features_final)
features_final.to_csv('features_encoded_processed.csv',index=False)


In [11]:
data_raw_fname = 'be223a_dataset.csv'
data_raw = pd.read_csv(data_raw_fname)

ICD_list = data_raw['icd9']
ICD_ct = ICD_list.groupby(ICD_list).count()
print(ICD_ct)
ICD_ct2= ICD_list.value_counts()
print(ICD_ct2)


icd9
011.90|                           4
011.90|440.0|                     1
011.90|V45.01|795.51|             1
018.90|                           2
038.9|                            1
038.9|453.6|                      1
038.9|486|995.91|                 1
038.9|518.0|                      2
038.9|518.0|514|                  1
038.9|518.0|780.60|               1
038.9|518.0|793.19|               5
038.9|518.0|799.02|               2
038.9|518.0|V58.81|               1
038.9|518.0|V58.81|514|           1
038.9|518.0|V58.81|793.19|        1
038.9|518.0|V58.81|995.91|        2
038.9|518.0|V58.82|514|           1
038.9|586|995.91|                 2
038.9|749.20|995.91|              1
038.9|959.12|959.19|              1
038.9|995.91|                    14
038.9|995.91|780.60|              1
038.9|995.91|780.97|              1
038.9|995.91|793.19|              3
038.9|995.91|959.11|793.19|       1
038.9|995.91|V58.44|              1
038.9|V12.54|348.89|              1
038.9|V42.7|998.12|453.

In [1]:
import sys
sys.path.append('./icd9_copy')

from icd9 import ICD9
tr=ICD9('codes.json')
b=tr.find('V58.82')
print(b.descr)

Fitting and adjustment of non-vascular catheter NEC
